diff --git a/generate/requirements.txt b/generate/requirements.txt
index 70b3c18a1ea140e1efee581fceb07b41b29e8e78..953304809f461249dd0ba8ed14eb644029e799eb 100644
--- a/generate/requirements.txt
+++ b/generate/requirements.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://test.pypi.org/simple/
 
-hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@516f33ba88809c2174d316883f09221ed0e7ce02
+hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@aee9c848e6500bbdecd135482f6ea0e3ef54c78b
 tomli >= 1.1.0 ; python_version < "3.11"
 clang-format
diff --git a/operators.toml b/operators.toml
index 806f89a228fb64cc1de7478b8ff617b4db4443d8..adaae642c0dd4f153448a51c6a5a19ce7e2a3958 100644
--- a/operators.toml
+++ b/operators.toml
@@ -388,3 +388,69 @@ quadrature    = 3
 blending      = "IcosahedralShellMap"
 loop-strategy = "sawtooth"
 optimizations = ["moveconstants", "vectorize", "quadloops"]
+
+[[advection]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.coefficient_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "IdentityMap"
+
+[[advection]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.coefficient_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "AnnulusMap"
+
+[[supg_diffusion]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.diffusivityXdelta_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "IdentityMap"
+
+[[supg_diffusion]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.diffusivityXdelta_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "AnnulusMap"
+
+[[supg_advection]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.coefficient_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "IdentityMap"
+
+[[supg_advection]]
+trial-space   = "P2"
+test-space    = "P2"
+form-space-args.coefficient_function_space = "P2"
+form-space-args.velocity_function_space = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops"]
+blending      = "AnnulusMap"
diff --git a/operators/CMakeLists.txt b/operators/CMakeLists.txt
index b8a33d29263fbe077fe4628a9ddde5760822171c..ceb4c8aaa70dcc44e6129a7dfed4b786f8cdf47c 100644
--- a/operators/CMakeLists.txt
+++ b/operators/CMakeLists.txt
@@ -7,6 +7,7 @@ endif()
 
 add_compile_options( "-Wno-unused-variable" )
 
+add_subdirectory(advection)
 add_subdirectory(curl_curl)
 add_subdirectory(diffusion)
 add_subdirectory(div_k_grad)
@@ -18,3 +19,5 @@ add_subdirectory(gradient)
 add_subdirectory(k_mass)
 add_subdirectory(mass)
 add_subdirectory(shear_heating)
+add_subdirectory(supg_advection)
+add_subdirectory(supg_diffusion)
diff --git a/operators/advection/CMakeLists.txt b/operators/advection/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..825f65cbc0b131209495b7914e44508bb89ffb5b
--- /dev/null
+++ b/operators/advection/CMakeLists.txt
@@ -0,0 +1,52 @@
+add_library( opgen-advection
+
+   P2ElementwiseAdvection.cpp
+   P2ElementwiseAdvection.hpp
+   P2ElementwiseAdvectionAnnulusMap.cpp
+   P2ElementwiseAdvectionAnnulusMap.hpp
+)
+
+if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
+   target_sources(opgen-advection PRIVATE
+
+      avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
+      avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
+      noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp
+   )
+
+   set_source_files_properties(
+
+      avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
+      avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
+
+      PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
+   )
+else()
+   if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY)
+      message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.")
+   endif()
+
+   target_sources(opgen-advection PRIVATE
+
+      noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
+      noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
+      noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp
+   )
+endif()
+
+if (HYTEG_BUILD_WITH_PETSC)
+   target_link_libraries(opgen-advection PUBLIC PETSc::PETSc)
+endif ()
+if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT)
+    target_compile_features(opgen-advection PUBLIC cxx_std_23)
+else ()
+    target_compile_features(opgen-advection PUBLIC cxx_std_17)
+endif ()
diff --git a/operators/advection/P2ElementwiseAdvection.cpp b/operators/advection/P2ElementwiseAdvection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3b60e138507e06439202de824e3d38e9d99d63fa
--- /dev/null
+++ b/operators/advection/P2ElementwiseAdvection.cpp
@@ -0,0 +1,391 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseAdvection::P2ElementwiseAdvection( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                size_t                                     minLevel,
+                                                size_t                                     maxLevel,
+                                                const P2Function< real_t >&                _cp,
+                                                const P2Function< real_t >&                _ux,
+                                                const P2Function< real_t >&                _uy )
+: Operator( storage, minLevel, maxLevel )
+, cp( _cp )
+, ux( _ux )
+, uy( _uy )
+{}
+
+void P2ElementwiseAdvection::apply( const P2Function< real_t >& src,
+                                    const P2Function< real_t >& dst,
+                                    uint_t                      level,
+                                    DoFType                     flag,
+                                    UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpVertex  = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpEdge    = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex  = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge    = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex  = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge    = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseAdvection_macro_2D(
+
+             _data_cpEdge,
+             _data_cpVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseAdvection::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                       const P2Function< idx_t >&                  src,
+                                       const P2Function< idx_t >&                  dst,
+                                       uint_t                                      level,
+                                       DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      cp.communicate< Face, Cell >( level );
+      cp.communicate< Edge, Cell >( level );
+      cp.communicate< Vertex, Cell >( level );
+      ux.communicate< Face, Cell >( level );
+      ux.communicate< Edge, Cell >( level );
+      ux.communicate< Vertex, Cell >( level );
+      uy.communicate< Face, Cell >( level );
+      uy.communicate< Edge, Cell >( level );
+      uy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpVertex  = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpEdge    = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex  = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge    = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex  = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge    = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseAdvection_macro_2D(
+
+             _data_cpEdge,
+             _data_cpVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         cp.communicate< Face, Cell >( level );
+         cp.communicate< Edge, Cell >( level );
+         cp.communicate< Vertex, Cell >( level );
+         ux.communicate< Face, Cell >( level );
+         ux.communicate< Edge, Cell >( level );
+         ux.communicate< Vertex, Cell >( level );
+         uy.communicate< Face, Cell >( level );
+         uy.communicate< Edge, Cell >( level );
+         uy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cpVertex     = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cpEdge       = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxVertex     = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxEdge       = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyVertex     = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyEdge       = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D(
+
+                _data_cpEdge,
+                _data_cpVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_uxEdge,
+                _data_uxVertex,
+                _data_uyEdge,
+                _data_uyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseAdvection::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/P2ElementwiseAdvection.hpp b/operators/advection/P2ElementwiseAdvection.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..88d58e6a7afa3ccf0fa354bca06954bdcf8a5b47
--- /dev/null
+++ b/operators/advection/P2ElementwiseAdvection.hpp
@@ -0,0 +1,183 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// advection operator which needs to be used in combination with SUPG
+///
+/// Geometry map:    IdentityMap
+///
+/// Weak formulation
+///
+///     T: trial function (scalar space: Lagrange, degree: 2)
+///     s: test function  (scalar space: Lagrange, degree: 2)
+///     u: velocity function (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ cp ( u · ∇T ) s
+
+class P2ElementwiseAdvection : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                               public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseAdvection( const std::shared_ptr< PrimitiveStorage >& storage,
+                           size_t                                     minLevel,
+                           size_t                                     maxLevel,
+                           const P2Function< real_t >&                _cp,
+                           const P2Function< real_t >&                _ux,
+                           const P2Function< real_t >&                _uy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    404     472      12       0      0              0                 0              1
+   void apply_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT _data_cpEdge,
+                                               real_t* RESTRICT _data_cpVertex,
+                                               real_t* RESTRICT _data_dstEdge,
+                                               real_t* RESTRICT _data_dstVertex,
+                                               real_t* RESTRICT _data_srcEdge,
+                                               real_t* RESTRICT _data_srcVertex,
+                                               real_t* RESTRICT _data_uxEdge,
+                                               real_t* RESTRICT _data_uxVertex,
+                                               real_t* RESTRICT _data_uyEdge,
+                                               real_t* RESTRICT _data_uyVertex,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    368     436      12       0      0              0                 0              4
+   void toMatrix_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT                     _data_cpEdge,
+                                                  real_t* RESTRICT                     _data_cpVertex,
+                                                  idx_t* RESTRICT                      _data_dstEdge,
+                                                  idx_t* RESTRICT                      _data_dstVertex,
+                                                  idx_t* RESTRICT                      _data_srcEdge,
+                                                  idx_t* RESTRICT                      _data_srcVertex,
+                                                  real_t* RESTRICT                     _data_uxEdge,
+                                                  real_t* RESTRICT                     _data_uxVertex,
+                                                  real_t* RESTRICT                     _data_uyEdge,
+                                                  real_t* RESTRICT                     _data_uyVertex,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    254     316      12       0      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT _data_cpEdge,
+                                                                              real_t* RESTRICT _data_cpVertex,
+                                                                              real_t* RESTRICT _data_invDiag_Edge,
+                                                                              real_t* RESTRICT _data_invDiag_Vertex,
+                                                                              real_t* RESTRICT _data_uxEdge,
+                                                                              real_t* RESTRICT _data_uxVertex,
+                                                                              real_t* RESTRICT _data_uyEdge,
+                                                                              real_t* RESTRICT _data_uyVertex,
+                                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                                              int64_t          micro_edges_per_macro_edge,
+                                                                              real_t micro_edges_per_macro_edge_float ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    cp;
+   P2Function< real_t >                    ux;
+   P2Function< real_t >                    uy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp b/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c88e9c6b6e7f118575ffcc0220e2f1899bf25528
--- /dev/null
+++ b/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp
@@ -0,0 +1,448 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseAdvectionAnnulusMap::P2ElementwiseAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                    size_t                                     minLevel,
+                                                                    size_t                                     maxLevel,
+                                                                    const P2Function< real_t >&                _cp,
+                                                                    const P2Function< real_t >&                _ux,
+                                                                    const P2Function< real_t >&                _uy )
+: Operator( storage, minLevel, maxLevel )
+, cp( _cp )
+, ux( _ux )
+, uy( _uy )
+{}
+
+void P2ElementwiseAdvectionAnnulusMap::apply( const P2Function< real_t >& src,
+                                              const P2Function< real_t >& dst,
+                                              uint_t                      level,
+                                              DoFType                     flag,
+                                              UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpVertex  = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpEdge    = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex  = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge    = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex  = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge    = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseAdvectionAnnulusMap_macro_2D(
+
+             _data_cpEdge,
+             _data_cpVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseAdvectionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                 const P2Function< idx_t >&                  src,
+                                                 const P2Function< idx_t >&                  dst,
+                                                 uint_t                                      level,
+                                                 DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      cp.communicate< Face, Cell >( level );
+      cp.communicate< Edge, Cell >( level );
+      cp.communicate< Vertex, Cell >( level );
+      ux.communicate< Face, Cell >( level );
+      ux.communicate< Edge, Cell >( level );
+      ux.communicate< Vertex, Cell >( level );
+      uy.communicate< Face, Cell >( level );
+      uy.communicate< Edge, Cell >( level );
+      uy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpVertex  = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cpEdge    = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex  = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge    = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex  = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge    = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D(
+
+             _data_cpEdge,
+             _data_cpVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         cp.communicate< Face, Cell >( level );
+         cp.communicate< Edge, Cell >( level );
+         cp.communicate< Vertex, Cell >( level );
+         ux.communicate< Face, Cell >( level );
+         ux.communicate< Edge, Cell >( level );
+         ux.communicate< Vertex, Cell >( level );
+         uy.communicate< Face, Cell >( level );
+         uy.communicate< Edge, Cell >( level );
+         uy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cpVertex     = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cpEdge       = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxVertex     = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxEdge       = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyVertex     = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyEdge       = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D(
+
+                _data_cpEdge,
+                _data_cpVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_uxEdge,
+                _data_uxVertex,
+                _data_uyEdge,
+                _data_uyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseAdvectionAnnulusMap::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp b/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f1f9b111c6b57030fe580644ac9727c00f1f1a12
--- /dev/null
+++ b/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp
@@ -0,0 +1,208 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// advection operator which needs to be used in combination with SUPG
+///
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     T: trial function (scalar space: Lagrange, degree: 2)
+///     s: test function  (scalar space: Lagrange, degree: 2)
+///     u: velocity function (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ cp ( u · ∇T ) s
+
+class P2ElementwiseAdvectionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                         public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                     size_t                                     minLevel,
+                                     size_t                                     maxLevel,
+                                     const P2Function< real_t >&                _cp,
+                                     const P2Function< real_t >&                _ux,
+                                     const P2Function< real_t >&                _uy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    640     892      20      12      0              0                 0              1
+   void apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cpEdge,
+                                                         real_t* RESTRICT _data_cpVertex,
+                                                         real_t* RESTRICT _data_dstEdge,
+                                                         real_t* RESTRICT _data_dstVertex,
+                                                         real_t* RESTRICT _data_srcEdge,
+                                                         real_t* RESTRICT _data_srcVertex,
+                                                         real_t* RESTRICT _data_uxEdge,
+                                                         real_t* RESTRICT _data_uxVertex,
+                                                         real_t* RESTRICT _data_uyEdge,
+                                                         real_t* RESTRICT _data_uyVertex,
+                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                         int64_t          micro_edges_per_macro_edge,
+                                                         real_t           micro_edges_per_macro_edge_float,
+                                                         real_t           radRayVertex,
+                                                         real_t           radRefVertex,
+                                                         real_t           rayVertex_0,
+                                                         real_t           rayVertex_1,
+                                                         real_t           refVertex_0,
+                                                         real_t           refVertex_1,
+                                                         real_t           thrVertex_0,
+                                                         real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    604     856      20      12      0              0                 0              4
+   void toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT                     _data_cpEdge,
+                                                            real_t* RESTRICT                     _data_cpVertex,
+                                                            idx_t* RESTRICT                      _data_dstEdge,
+                                                            idx_t* RESTRICT                      _data_dstVertex,
+                                                            idx_t* RESTRICT                      _data_srcEdge,
+                                                            idx_t* RESTRICT                      _data_srcVertex,
+                                                            real_t* RESTRICT                     _data_uxEdge,
+                                                            real_t* RESTRICT                     _data_uxVertex,
+                                                            real_t* RESTRICT                     _data_uyEdge,
+                                                            real_t* RESTRICT                     _data_uyVertex,
+                                                            real_t                               macro_vertex_coord_id_0comp0,
+                                                            real_t                               macro_vertex_coord_id_0comp1,
+                                                            real_t                               macro_vertex_coord_id_1comp0,
+                                                            real_t                               macro_vertex_coord_id_1comp1,
+                                                            real_t                               macro_vertex_coord_id_2comp0,
+                                                            real_t                               macro_vertex_coord_id_2comp1,
+                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                            int64_t                              micro_edges_per_macro_edge,
+                                                            real_t                               micro_edges_per_macro_edge_float,
+                                                            real_t                               radRayVertex,
+                                                            real_t                               radRefVertex,
+                                                            real_t                               rayVertex_0,
+                                                            real_t                               rayVertex_1,
+                                                            real_t                               refVertex_0,
+                                                            real_t                               refVertex_1,
+                                                            real_t                               thrVertex_0,
+                                                            real_t                               thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    490     736      20      12      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cpEdge,
+                                                                                        real_t* RESTRICT _data_cpVertex,
+                                                                                        real_t* RESTRICT _data_invDiag_Edge,
+                                                                                        real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                        real_t* RESTRICT _data_uxEdge,
+                                                                                        real_t* RESTRICT _data_uxVertex,
+                                                                                        real_t* RESTRICT _data_uyEdge,
+                                                                                        real_t* RESTRICT _data_uyVertex,
+                                                                                        real_t  macro_vertex_coord_id_0comp0,
+                                                                                        real_t  macro_vertex_coord_id_0comp1,
+                                                                                        real_t  macro_vertex_coord_id_1comp0,
+                                                                                        real_t  macro_vertex_coord_id_1comp1,
+                                                                                        real_t  macro_vertex_coord_id_2comp0,
+                                                                                        real_t  macro_vertex_coord_id_2comp1,
+                                                                                        int64_t micro_edges_per_macro_edge,
+                                                                                        real_t  micro_edges_per_macro_edge_float,
+                                                                                        real_t  radRayVertex,
+                                                                                        real_t  radRefVertex,
+                                                                                        real_t  rayVertex_0,
+                                                                                        real_t  rayVertex_1,
+                                                                                        real_t  refVertex_0,
+                                                                                        real_t  refVertex_1,
+                                                                                        real_t  thrVertex_0,
+                                                                                        real_t  thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    cp;
+   P2Function< real_t >                    ux;
+   P2Function< real_t >                    uy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..96bbe2222a72a27518f8b3ff4762b3bb1919305d
--- /dev/null
+++ b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,1193 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvectionAnnulusMap::apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69);
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_55);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_55);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_55);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))));
+                   const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_59))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_59))));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_63))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_63))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_70))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_70))));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_74))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_74))));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_78))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_78))));
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_56);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_60);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_64);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_71);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_75);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_79);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_80);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_80);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_80);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_80);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_80);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_80);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_81);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_81);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_81);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_81);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_81);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_81);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_82);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_82);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_82);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_82);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_82);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_82);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_83);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_83);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_83);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_83);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_83);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_84);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_84);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_84);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_84);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_84);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_84);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57;
+                   const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57;
+                   const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61;
+                   const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61;
+                   const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                   const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66;
+                   const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69;
+                   const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65;
+                   const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                   const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                   const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                   const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                   const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                   const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                   const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                   const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                   const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                   const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                   const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                   const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                   const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                   const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                   const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                   const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                   const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                   const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                   const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                   const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                   const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                   const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                   const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                   const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                   const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                   const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                   const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                   const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                   const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                   const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                   const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                   const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                   const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                   const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                   const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                   const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                   const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                   const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                   const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                   const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                   const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                   const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                   const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                   const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69);
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_55);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_55);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_55);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))));
+                   const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_59))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_59))));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_63))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_63))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_70))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_70))));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_74))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_74))));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_78))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_78))));
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_56);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_60);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_64);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_71);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_75);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_79);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_80);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_80);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_80);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_80);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_80);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_80);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_81);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_81);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_81);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_81);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_81);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_81);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_82);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_82);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_82);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_82);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_82);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_82);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_83);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_83);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_83);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_83);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_83);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_84);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_84);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_84);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_84);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_84);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_84);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57;
+                   const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57;
+                   const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61;
+                   const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61;
+                   const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                   const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66;
+                   const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69;
+                   const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65;
+                   const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                   const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                   const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                   const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                   const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                   const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                   const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                   const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                   const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                   const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                   const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                   const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                   const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                   const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                   const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                   const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                   const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                   const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                   const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                   const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                   const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                   const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                   const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                   const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                   const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                   const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                   const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                   const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                   const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                   const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                   const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                   const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                   const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                   const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                   const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                   const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                   const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                   const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                   const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                   const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                   const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                   const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                   const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                   const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cbfd3a92a612ab010deca9a0232f1d27fe0989e1
--- /dev/null
+++ b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,761 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,ux_dof_3),_mm256_mul_pd(tmp_qloop_43,ux_dof_0)),_mm256_mul_pd(tmp_qloop_48,ux_dof_1)),_mm256_mul_pd(tmp_qloop_49,ux_dof_2)),_mm256_mul_pd(tmp_qloop_50,ux_dof_4)),_mm256_mul_pd(tmp_qloop_51,ux_dof_5));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,uy_dof_3),_mm256_mul_pd(tmp_qloop_43,uy_dof_0)),_mm256_mul_pd(tmp_qloop_48,uy_dof_1)),_mm256_mul_pd(tmp_qloop_49,uy_dof_2)),_mm256_mul_pd(tmp_qloop_50,uy_dof_4)),_mm256_mul_pd(tmp_qloop_51,uy_dof_5));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_43),_mm256_mul_pd(cp_dof_1,tmp_qloop_48)),_mm256_mul_pd(cp_dof_2,tmp_qloop_49)),_mm256_mul_pd(cp_dof_3,tmp_qloop_38)),_mm256_mul_pd(cp_dof_4,tmp_qloop_50)),_mm256_mul_pd(cp_dof_5,tmp_qloop_51))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_47))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_47)))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                   const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                   const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0;
+                   const real_t tmp_qloop_46 = jac_affine_inv_0_0_GRAY*tmp_qloop_45 + jac_affine_inv_1_0_GRAY*tmp_qloop_45;
+                   const real_t tmp_qloop_47 = jac_affine_inv_0_1_GRAY*tmp_qloop_45 + jac_affine_inv_1_1_GRAY*tmp_qloop_45;
+                   const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q];
+                   const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                   const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5;
+                   const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5;
+                   const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+                   const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0;
+                   const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+                   const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_44;
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                   const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62;
+                   const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65;
+                   const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61;
+                   const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47));
+                   const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57));
+                   const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60));
+                   const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66));
+                   const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69));
+                   const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,ux_dof_3),_mm256_mul_pd(tmp_qloop_43,ux_dof_0)),_mm256_mul_pd(tmp_qloop_48,ux_dof_1)),_mm256_mul_pd(tmp_qloop_49,ux_dof_2)),_mm256_mul_pd(tmp_qloop_50,ux_dof_4)),_mm256_mul_pd(tmp_qloop_51,ux_dof_5));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,uy_dof_3),_mm256_mul_pd(tmp_qloop_43,uy_dof_0)),_mm256_mul_pd(tmp_qloop_48,uy_dof_1)),_mm256_mul_pd(tmp_qloop_49,uy_dof_2)),_mm256_mul_pd(tmp_qloop_50,uy_dof_4)),_mm256_mul_pd(tmp_qloop_51,uy_dof_5));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_43),_mm256_mul_pd(cp_dof_1,tmp_qloop_48)),_mm256_mul_pd(cp_dof_2,tmp_qloop_49)),_mm256_mul_pd(cp_dof_3,tmp_qloop_38)),_mm256_mul_pd(cp_dof_4,tmp_qloop_50)),_mm256_mul_pd(cp_dof_5,tmp_qloop_51))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_47))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_47)))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                   const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                   const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0;
+                   const real_t tmp_qloop_46 = jac_affine_inv_0_0_BLUE*tmp_qloop_45 + jac_affine_inv_1_0_BLUE*tmp_qloop_45;
+                   const real_t tmp_qloop_47 = jac_affine_inv_0_1_BLUE*tmp_qloop_45 + jac_affine_inv_1_1_BLUE*tmp_qloop_45;
+                   const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q];
+                   const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                   const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5;
+                   const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5;
+                   const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55;
+                   const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0;
+                   const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58;
+                   const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_44;
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                   const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62;
+                   const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65;
+                   const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61;
+                   const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47));
+                   const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57));
+                   const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60));
+                   const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66));
+                   const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69));
+                   const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6e8fc0c2a4da287a0792bfbc8bf673818d9a9c24
--- /dev/null
+++ b/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
@@ -0,0 +1,948 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvection::apply_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_22,tmp_qloop_23)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_6);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_9);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_16);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_16);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_17);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_19);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_21);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_26);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_28);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_30);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_31);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_31);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_31);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_31);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_32);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_32);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_32);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_32);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_32);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_32);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_33);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_33);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_33);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_33);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_33);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_33);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_34);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_34);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_34);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_35);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_35);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_35);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_35);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_35);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_35);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                   const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18;
+                   const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20;
+                   const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                   const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25);
+                   const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24);
+                   const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                   const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                   const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                   const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                   const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                   const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                   const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                   const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                   const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                   const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                   const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                   const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                   const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                   const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                   const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                   const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                   const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                   const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                   const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                   const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                   const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                   const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                   const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                   const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                   const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                   const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                   const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                   const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                   const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                   const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                   const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                   const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_22,tmp_qloop_23)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_6);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_9);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_16);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_16);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_17);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_19);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_21);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_26);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_28);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_30);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_31);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_31);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_31);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_31);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_32);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_32);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_32);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_32);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_32);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_32);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_33);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_33);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_33);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_33);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_33);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_33);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_34);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_34);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_34);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_35);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_35);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_35);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_35);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_35);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_35);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                   const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18;
+                   const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20;
+                   const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                   const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25);
+                   const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24);
+                   const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                   const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                   const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                   const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                   const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                   const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                   const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                   const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                   const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                   const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                   const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                   const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                   const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                   const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                   const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                   const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                   const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                   const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                   const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                   const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                   const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                   const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                   const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                   const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                   const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                   const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                   const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                   const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                   const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                   const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                   const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                   const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e93c26e5ed0553b97a1a0f40291f94a164b48e83
--- /dev/null
+++ b/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
@@ -0,0 +1,516 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,ux_dof_3),_mm256_mul_pd(tmp_qloop_10,ux_dof_2)),_mm256_mul_pd(tmp_qloop_11,ux_dof_4)),_mm256_mul_pd(tmp_qloop_12,ux_dof_5)),_mm256_mul_pd(tmp_qloop_6,ux_dof_0)),_mm256_mul_pd(tmp_qloop_9,ux_dof_1));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,uy_dof_3),_mm256_mul_pd(tmp_qloop_10,uy_dof_2)),_mm256_mul_pd(tmp_qloop_11,uy_dof_4)),_mm256_mul_pd(tmp_qloop_12,uy_dof_5)),_mm256_mul_pd(tmp_qloop_6,uy_dof_0)),_mm256_mul_pd(tmp_qloop_9,uy_dof_1));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_6),_mm256_mul_pd(cp_dof_1,tmp_qloop_9)),_mm256_mul_pd(cp_dof_2,tmp_qloop_10)),_mm256_mul_pd(cp_dof_3,tmp_qloop_1)),_mm256_mul_pd(cp_dof_4,tmp_qloop_11)),_mm256_mul_pd(cp_dof_5,tmp_qloop_12)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_6),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_9),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_10,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_1,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_11,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q];
+                   const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1;
+                   const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q];
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_7;
+                   const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_7;
+                   const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_8 + jac_affine_inv_1_0_GRAY*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_8 + jac_affine_inv_1_1_GRAY*tmp_qloop_8));
+                   const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16);
+                   const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17);
+                   const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21));
+                   const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21));
+                   const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,ux_dof_3),_mm256_mul_pd(tmp_qloop_10,ux_dof_2)),_mm256_mul_pd(tmp_qloop_11,ux_dof_4)),_mm256_mul_pd(tmp_qloop_12,ux_dof_5)),_mm256_mul_pd(tmp_qloop_6,ux_dof_0)),_mm256_mul_pd(tmp_qloop_9,ux_dof_1));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,uy_dof_3),_mm256_mul_pd(tmp_qloop_10,uy_dof_2)),_mm256_mul_pd(tmp_qloop_11,uy_dof_4)),_mm256_mul_pd(tmp_qloop_12,uy_dof_5)),_mm256_mul_pd(tmp_qloop_6,uy_dof_0)),_mm256_mul_pd(tmp_qloop_9,uy_dof_1));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_6),_mm256_mul_pd(cp_dof_1,tmp_qloop_9)),_mm256_mul_pd(cp_dof_2,tmp_qloop_10)),_mm256_mul_pd(cp_dof_3,tmp_qloop_1)),_mm256_mul_pd(cp_dof_4,tmp_qloop_11)),_mm256_mul_pd(cp_dof_5,tmp_qloop_12)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_6),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_9),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_10,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_1,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_11,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q];
+                   const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1;
+                   const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q];
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_7;
+                   const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_7;
+                   const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_8 + jac_affine_inv_1_0_BLUE*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_8 + jac_affine_inv_1_1_BLUE*tmp_qloop_8));
+                   const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16);
+                   const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17);
+                   const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21));
+                   const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21));
+                   const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1fdbdb498081da0e27987c6f81e906e46e44fed4
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,661 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvectionAnnulusMap::apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57;
+                const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61;
+                const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66;
+                const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69;
+                const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65;
+                const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57;
+                const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61;
+                const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66;
+                const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69;
+                const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65;
+                const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b2432dfc1d0fdc704d974e2d0a00c805f8202010
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,445 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0;
+                const real_t tmp_qloop_46 = jac_affine_inv_0_0_GRAY*tmp_qloop_45 + jac_affine_inv_1_0_GRAY*tmp_qloop_45;
+                const real_t tmp_qloop_47 = jac_affine_inv_0_1_GRAY*tmp_qloop_45 + jac_affine_inv_1_1_GRAY*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q];
+                const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q];
+                const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0;
+                const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5;
+                const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5;
+                const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+                const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0;
+                const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58;
+                const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+                const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_44;
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62;
+                const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65;
+                const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61;
+                const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47));
+                const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57));
+                const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60));
+                const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66));
+                const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69));
+                const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0;
+                const real_t tmp_qloop_46 = jac_affine_inv_0_0_BLUE*tmp_qloop_45 + jac_affine_inv_1_0_BLUE*tmp_qloop_45;
+                const real_t tmp_qloop_47 = jac_affine_inv_0_1_BLUE*tmp_qloop_45 + jac_affine_inv_1_1_BLUE*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q];
+                const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q];
+                const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0;
+                const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5;
+                const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5;
+                const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55;
+                const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0;
+                const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58;
+                const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58;
+                const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_44;
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62;
+                const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65;
+                const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61;
+                const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47));
+                const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57));
+                const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60));
+                const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66));
+                const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69));
+                const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..82d532646aacee8a0d5747e26cb572f8f8be1a79
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,819 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvectionAnnulusMap::toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57;
+                const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61;
+                const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66;
+                const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69;
+                const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65;
+                const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57;
+                const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61;
+                const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66;
+                const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69;
+                const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65;
+                const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55;
+                const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55;
+                const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55;
+                const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55;
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59);
+                const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63);
+                const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70);
+                const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74);
+                const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78);
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56;
+                const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60;
+                const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64;
+                const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71;
+                const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75;
+                const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79;
+                const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80;
+                const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80;
+                const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80;
+                const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80;
+                const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80;
+                const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80;
+                const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81;
+                const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81;
+                const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81;
+                const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81;
+                const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81;
+                const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81;
+                const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82;
+                const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82;
+                const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82;
+                const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82;
+                const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82;
+                const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82;
+                const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83;
+                const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83;
+                const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83;
+                const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83;
+                const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83;
+                const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83;
+                const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84;
+                const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84;
+                const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84;
+                const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84;
+                const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e303a9e31626d1266b498a1ce2faee2367ff3b5
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp
@@ -0,0 +1,534 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvection::apply_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18;
+                const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25);
+                const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24);
+                const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18;
+                const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25);
+                const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24);
+                const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..737154b859f434fc6b415c420a64cd1014ea0ef0
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp
@@ -0,0 +1,318 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0;
+                const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q];
+                const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q];
+                const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0;
+                const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1;
+                const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1;
+                const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q];
+                const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_8 + jac_affine_inv_1_0_GRAY*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_8 + jac_affine_inv_1_1_GRAY*tmp_qloop_8));
+                const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16);
+                const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17);
+                const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21));
+                const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21));
+                const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0;
+                const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q];
+                const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q];
+                const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0;
+                const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1;
+                const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1;
+                const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q];
+                const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_8 + jac_affine_inv_1_0_BLUE*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_8 + jac_affine_inv_1_1_BLUE*tmp_qloop_8));
+                const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16);
+                const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17);
+                const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21));
+                const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21));
+                const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..108b0d54ad22c8eea9c977062ed2f70efe9153c7
--- /dev/null
+++ b/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp
@@ -0,0 +1,692 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseAdvection::toMatrix_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT  _data_cpEdge, real_t * RESTRICT  _data_cpVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18;
+                const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25);
+                const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24);
+                const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18;
+                const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25);
+                const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25);
+                const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24);
+                const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6;
+                const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9;
+                const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16;
+                const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16;
+                const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17;
+                const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19;
+                const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21;
+                const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26;
+                const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28;
+                const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30;
+                const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31;
+                const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31;
+                const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31;
+                const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31;
+                const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31;
+                const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31;
+                const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32;
+                const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32;
+                const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32;
+                const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32;
+                const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32;
+                const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32;
+                const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33;
+                const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33;
+                const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33;
+                const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33;
+                const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34;
+                const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34;
+                const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34;
+                const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34;
+                const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34;
+                const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34;
+                const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35;
+                const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35;
+                const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35;
+                const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/CMakeLists.txt b/operators/supg_advection/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af2b32f7cbe94964056c3ae833647aeca9540f7c
--- /dev/null
+++ b/operators/supg_advection/CMakeLists.txt
@@ -0,0 +1,52 @@
+add_library( opgen-supg_advection
+
+   P2ElementwiseSupgAdvection.cpp
+   P2ElementwiseSupgAdvection.hpp
+   P2ElementwiseSupgAdvectionAnnulusMap.cpp
+   P2ElementwiseSupgAdvectionAnnulusMap.hpp
+)
+
+if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
+   target_sources(opgen-supg_advection PRIVATE
+
+      avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp
+   )
+
+   set_source_files_properties(
+
+      avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
+      avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
+
+      PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
+   )
+else()
+   if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY)
+      message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.")
+   endif()
+
+   target_sources(opgen-supg_advection PRIVATE
+
+      noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
+      noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp
+   )
+endif()
+
+if (HYTEG_BUILD_WITH_PETSC)
+   target_link_libraries(opgen-supg_advection PUBLIC PETSc::PETSc)
+endif ()
+if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT)
+    target_compile_features(opgen-supg_advection PUBLIC cxx_std_23)
+else ()
+    target_compile_features(opgen-supg_advection PUBLIC cxx_std_17)
+endif ()
diff --git a/operators/supg_advection/P2ElementwiseSupgAdvection.cpp b/operators/supg_advection/P2ElementwiseSupgAdvection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..da2963e6920df65784e9eeac831e7355ad8814b8
--- /dev/null
+++ b/operators/supg_advection/P2ElementwiseSupgAdvection.cpp
@@ -0,0 +1,397 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseSupgAdvection::P2ElementwiseSupgAdvection( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                        size_t                                     minLevel,
+                                                        size_t                                     maxLevel,
+                                                        const P2Function< real_t >&                _cp_times_delta,
+                                                        const P2Function< real_t >&                _ux,
+                                                        const P2Function< real_t >&                _uy )
+: Operator( storage, minLevel, maxLevel )
+, cp_times_delta( _cp_times_delta )
+, ux( _ux )
+, uy( _uy )
+{}
+
+void P2ElementwiseSupgAdvection::apply( const P2Function< real_t >& src,
+                                        const P2Function< real_t >& dst,
+                                        uint_t                      level,
+                                        DoFType                     flag,
+                                        UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaVertex =
+             face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaEdge =
+             face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseSupgAdvection_macro_2D(
+
+             _data_cp_times_deltaEdge,
+             _data_cp_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseSupgAdvection::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                           const P2Function< idx_t >&                  src,
+                                           const P2Function< idx_t >&                  dst,
+                                           uint_t                                      level,
+                                           DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      cp_times_delta.communicate< Face, Cell >( level );
+      cp_times_delta.communicate< Edge, Cell >( level );
+      cp_times_delta.communicate< Vertex, Cell >( level );
+      ux.communicate< Face, Cell >( level );
+      ux.communicate< Edge, Cell >( level );
+      ux.communicate< Vertex, Cell >( level );
+      uy.communicate< Face, Cell >( level );
+      uy.communicate< Edge, Cell >( level );
+      uy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaVertex =
+             face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaEdge =
+             face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseSupgAdvection_macro_2D(
+
+             _data_cp_times_deltaEdge,
+             _data_cp_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         cp_times_delta.communicate< Face, Cell >( level );
+         cp_times_delta.communicate< Edge, Cell >( level );
+         cp_times_delta.communicate< Vertex, Cell >( level );
+         ux.communicate< Face, Cell >( level );
+         ux.communicate< Edge, Cell >( level );
+         ux.communicate< Vertex, Cell >( level );
+         uy.communicate< Face, Cell >( level );
+         uy.communicate< Edge, Cell >( level );
+         uy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cp_times_deltaVertex =
+                face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cp_times_deltaEdge =
+                face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D(
+
+                _data_cp_times_deltaEdge,
+                _data_cp_times_deltaVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_uxEdge,
+                _data_uxVertex,
+                _data_uyEdge,
+                _data_uyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgAdvection::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/P2ElementwiseSupgAdvection.hpp b/operators/supg_advection/P2ElementwiseSupgAdvection.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a9d3838f1a614003a2bed318c4c0216d9239e1e
--- /dev/null
+++ b/operators/supg_advection/P2ElementwiseSupgAdvection.hpp
@@ -0,0 +1,183 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// advection operator which needs to be used in combination with SUPG
+///
+/// Geometry map:    IdentityMap
+///
+/// Weak formulation
+///
+///     T: trial function (scalar space: Lagrange, degree: 2)
+///     s: test function  (scalar space: Lagrange, degree: 2)
+///     u: velocity function (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ cp ( u · ∇T ) 𝛿(u · ∇s)
+
+class P2ElementwiseSupgAdvection : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                   public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseSupgAdvection( const std::shared_ptr< PrimitiveStorage >& storage,
+                               size_t                                     minLevel,
+                               size_t                                     maxLevel,
+                               const P2Function< real_t >&                _cp_times_delta,
+                               const P2Function< real_t >&                _ux,
+                               const P2Function< real_t >&                _uy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseSupgAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    404     432      12       0      0              0                 0              1
+   void apply_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge,
+                                                   real_t* RESTRICT _data_cp_times_deltaVertex,
+                                                   real_t* RESTRICT _data_dstEdge,
+                                                   real_t* RESTRICT _data_dstVertex,
+                                                   real_t* RESTRICT _data_srcEdge,
+                                                   real_t* RESTRICT _data_srcVertex,
+                                                   real_t* RESTRICT _data_uxEdge,
+                                                   real_t* RESTRICT _data_uxVertex,
+                                                   real_t* RESTRICT _data_uyEdge,
+                                                   real_t* RESTRICT _data_uyVertex,
+                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                   int64_t          micro_edges_per_macro_edge,
+                                                   real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseSupgAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    368     396      12       0      0              0                 0              4
+   void toMatrix_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT                     _data_cp_times_deltaEdge,
+                                                      real_t* RESTRICT                     _data_cp_times_deltaVertex,
+                                                      idx_t* RESTRICT                      _data_dstEdge,
+                                                      idx_t* RESTRICT                      _data_dstVertex,
+                                                      idx_t* RESTRICT                      _data_srcEdge,
+                                                      idx_t* RESTRICT                      _data_srcVertex,
+                                                      real_t* RESTRICT                     _data_uxEdge,
+                                                      real_t* RESTRICT                     _data_uxVertex,
+                                                      real_t* RESTRICT                     _data_uyEdge,
+                                                      real_t* RESTRICT                     _data_uyVertex,
+                                                      real_t                               macro_vertex_coord_id_0comp0,
+                                                      real_t                               macro_vertex_coord_id_0comp1,
+                                                      real_t                               macro_vertex_coord_id_1comp0,
+                                                      real_t                               macro_vertex_coord_id_1comp1,
+                                                      real_t                               macro_vertex_coord_id_2comp0,
+                                                      real_t                               macro_vertex_coord_id_2comp1,
+                                                      std::shared_ptr< SparseMatrixProxy > mat,
+                                                      int64_t                              micro_edges_per_macro_edge,
+                                                      real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseSupgAdvection
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    254     316      12       0      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge,
+                                                                                  real_t* RESTRICT _data_cp_times_deltaVertex,
+                                                                                  real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_uxEdge,
+                                                                                  real_t* RESTRICT _data_uxVertex,
+                                                                                  real_t* RESTRICT _data_uyEdge,
+                                                                                  real_t* RESTRICT _data_uyVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    cp_times_delta;
+   P2Function< real_t >                    ux;
+   P2Function< real_t >                    uy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..486b863cd2fbf17de6a5d00f214a5c268ecb5bed
--- /dev/null
+++ b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp
@@ -0,0 +1,454 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseSupgAdvectionAnnulusMap::P2ElementwiseSupgAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                            size_t                                     minLevel,
+                                                                            size_t                                     maxLevel,
+                                                                            const P2Function< real_t >& _cp_times_delta,
+                                                                            const P2Function< real_t >& _ux,
+                                                                            const P2Function< real_t >& _uy )
+: Operator( storage, minLevel, maxLevel )
+, cp_times_delta( _cp_times_delta )
+, ux( _ux )
+, uy( _uy )
+{}
+
+void P2ElementwiseSupgAdvectionAnnulusMap::apply( const P2Function< real_t >& src,
+                                                  const P2Function< real_t >& dst,
+                                                  uint_t                      level,
+                                                  DoFType                     flag,
+                                                  UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaVertex =
+             face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaEdge =
+             face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D(
+
+             _data_cp_times_deltaEdge,
+             _data_cp_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseSupgAdvectionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                     const P2Function< idx_t >&                  src,
+                                                     const P2Function< idx_t >&                  dst,
+                                                     uint_t                                      level,
+                                                     DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      cp_times_delta.communicate< Face, Cell >( level );
+      cp_times_delta.communicate< Edge, Cell >( level );
+      cp_times_delta.communicate< Vertex, Cell >( level );
+      ux.communicate< Face, Cell >( level );
+      ux.communicate< Edge, Cell >( level );
+      ux.communicate< Vertex, Cell >( level );
+      uy.communicate< Face, Cell >( level );
+      uy.communicate< Edge, Cell >( level );
+      uy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaVertex =
+             face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_cp_times_deltaEdge =
+             face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D(
+
+             _data_cp_times_deltaEdge,
+             _data_cp_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_uxEdge,
+             _data_uxVertex,
+             _data_uyEdge,
+             _data_uyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         cp_times_delta.communicate< Face, Cell >( level );
+         cp_times_delta.communicate< Edge, Cell >( level );
+         cp_times_delta.communicate< Vertex, Cell >( level );
+         ux.communicate< Face, Cell >( level );
+         ux.communicate< Edge, Cell >( level );
+         ux.communicate< Vertex, Cell >( level );
+         uy.communicate< Face, Cell >( level );
+         uy.communicate< Edge, Cell >( level );
+         uy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cp_times_deltaVertex =
+                face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_cp_times_deltaEdge =
+                face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uxEdge   = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_uyEdge   = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D(
+
+                _data_cp_times_deltaEdge,
+                _data_cp_times_deltaVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_uxEdge,
+                _data_uxVertex,
+                _data_uyEdge,
+                _data_uyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgAdvectionAnnulusMap::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f012333e253ef9799d976b344b3dcc8775f4737
--- /dev/null
+++ b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp
@@ -0,0 +1,209 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// advection operator which needs to be used in combination with SUPG
+///
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     T: trial function (scalar space: Lagrange, degree: 2)
+///     s: test function  (scalar space: Lagrange, degree: 2)
+///     u: velocity function (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ cp ( u · ∇T ) 𝛿(u · ∇s)
+
+class P2ElementwiseSupgAdvectionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                             public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseSupgAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                         size_t                                     minLevel,
+                                         size_t                                     maxLevel,
+                                         const P2Function< real_t >&                _cp_times_delta,
+                                         const P2Function< real_t >&                _ux,
+                                         const P2Function< real_t >&                _uy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseSupgAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    640     852      20      12      0              0                 0              1
+   void apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge,
+                                                             real_t* RESTRICT _data_cp_times_deltaVertex,
+                                                             real_t* RESTRICT _data_dstEdge,
+                                                             real_t* RESTRICT _data_dstVertex,
+                                                             real_t* RESTRICT _data_srcEdge,
+                                                             real_t* RESTRICT _data_srcVertex,
+                                                             real_t* RESTRICT _data_uxEdge,
+                                                             real_t* RESTRICT _data_uxVertex,
+                                                             real_t* RESTRICT _data_uyEdge,
+                                                             real_t* RESTRICT _data_uyVertex,
+                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                             int64_t          micro_edges_per_macro_edge,
+                                                             real_t           micro_edges_per_macro_edge_float,
+                                                             real_t           radRayVertex,
+                                                             real_t           radRefVertex,
+                                                             real_t           rayVertex_0,
+                                                             real_t           rayVertex_1,
+                                                             real_t           refVertex_0,
+                                                             real_t           refVertex_1,
+                                                             real_t           thrVertex_0,
+                                                             real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseSupgAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    604     816      20      12      0              0                 0              4
+   void toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t* RESTRICT                     _data_cp_times_deltaEdge,
+                                                                real_t* RESTRICT                     _data_cp_times_deltaVertex,
+                                                                idx_t* RESTRICT                      _data_dstEdge,
+                                                                idx_t* RESTRICT                      _data_dstVertex,
+                                                                idx_t* RESTRICT                      _data_srcEdge,
+                                                                idx_t* RESTRICT                      _data_srcVertex,
+                                                                real_t* RESTRICT                     _data_uxEdge,
+                                                                real_t* RESTRICT                     _data_uxVertex,
+                                                                real_t* RESTRICT                     _data_uyEdge,
+                                                                real_t* RESTRICT                     _data_uyVertex,
+                                                                real_t                               macro_vertex_coord_id_0comp0,
+                                                                real_t                               macro_vertex_coord_id_0comp1,
+                                                                real_t                               macro_vertex_coord_id_1comp0,
+                                                                real_t                               macro_vertex_coord_id_1comp1,
+                                                                real_t                               macro_vertex_coord_id_2comp0,
+                                                                real_t                               macro_vertex_coord_id_2comp1,
+                                                                std::shared_ptr< SparseMatrixProxy > mat,
+                                                                int64_t                              micro_edges_per_macro_edge,
+                                                                real_t micro_edges_per_macro_edge_float,
+                                                                real_t radRayVertex,
+                                                                real_t radRefVertex,
+                                                                real_t rayVertex_0,
+                                                                real_t rayVertex_1,
+                                                                real_t refVertex_0,
+                                                                real_t refVertex_1,
+                                                                real_t thrVertex_0,
+                                                                real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseSupgAdvectionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    490     736      20      12      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D(
+       real_t* RESTRICT _data_cp_times_deltaEdge,
+       real_t* RESTRICT _data_cp_times_deltaVertex,
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_uxEdge,
+       real_t* RESTRICT _data_uxVertex,
+       real_t* RESTRICT _data_uyEdge,
+       real_t* RESTRICT _data_uyVertex,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    cp_times_delta;
+   P2Function< real_t >                    ux;
+   P2Function< real_t >                    uy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..12f74c85da1765f5e0ee8a94c21da89320ae9651
--- /dev/null
+++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,1245 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvectionAnnulusMap::apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(tmp_qloop_67,tmp_qloop_68);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))));
+                   const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_58))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_58))));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_60);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_64))));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_65);
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_73);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_77))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_78);
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_82))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_82))));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_59);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_65,tmp_qloop_85);
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_85);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_85);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_85);
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_65);
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_90);
+                   const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_90);
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_90);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_73);
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_94);
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_94);
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_78),tmp_qloop_83);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54),tmp_qloop_55);
+                   const __m256d q_tmp_0_1 = tmp_qloop_61;
+                   const __m256d q_tmp_0_2 = tmp_qloop_66;
+                   const __m256d q_tmp_0_3 = tmp_qloop_74;
+                   const __m256d q_tmp_0_4 = tmp_qloop_79;
+                   const __m256d q_tmp_0_5 = tmp_qloop_84;
+                   const __m256d q_tmp_1_0 = tmp_qloop_61;
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_59,tmp_qloop_59));
+                   const __m256d q_tmp_1_2 = tmp_qloop_86;
+                   const __m256d q_tmp_1_3 = tmp_qloop_87;
+                   const __m256d q_tmp_1_4 = tmp_qloop_88;
+                   const __m256d q_tmp_1_5 = tmp_qloop_89;
+                   const __m256d q_tmp_2_0 = tmp_qloop_66;
+                   const __m256d q_tmp_2_1 = tmp_qloop_86;
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_65,tmp_qloop_65));
+                   const __m256d q_tmp_2_3 = tmp_qloop_91;
+                   const __m256d q_tmp_2_4 = tmp_qloop_92;
+                   const __m256d q_tmp_2_5 = tmp_qloop_93;
+                   const __m256d q_tmp_3_0 = tmp_qloop_74;
+                   const __m256d q_tmp_3_1 = tmp_qloop_87;
+                   const __m256d q_tmp_3_2 = tmp_qloop_91;
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_73,tmp_qloop_73));
+                   const __m256d q_tmp_3_4 = tmp_qloop_95;
+                   const __m256d q_tmp_3_5 = tmp_qloop_96;
+                   const __m256d q_tmp_4_0 = tmp_qloop_79;
+                   const __m256d q_tmp_4_1 = tmp_qloop_88;
+                   const __m256d q_tmp_4_2 = tmp_qloop_92;
+                   const __m256d q_tmp_4_3 = tmp_qloop_95;
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_78,tmp_qloop_78));
+                   const __m256d q_tmp_4_5 = tmp_qloop_97;
+                   const __m256d q_tmp_5_0 = tmp_qloop_84;
+                   const __m256d q_tmp_5_1 = tmp_qloop_89;
+                   const __m256d q_tmp_5_2 = tmp_qloop_93;
+                   const __m256d q_tmp_5_3 = tmp_qloop_96;
+                   const __m256d q_tmp_5_4 = tmp_qloop_97;
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_83,tmp_qloop_83));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56;
+                   const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56;
+                   const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62;
+                   const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                   const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68;
+                   const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71;
+                   const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67;
+                   const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                   const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                   const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                   const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                   const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                   const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                   const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                   const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                   const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                   const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                   const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                   const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                   const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                   const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                   const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                   const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                   const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                   const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                   const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                   const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                   const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                   const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                   const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                   const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                   const real_t q_tmp_0_1 = tmp_qloop_61;
+                   const real_t q_tmp_0_2 = tmp_qloop_66;
+                   const real_t q_tmp_0_3 = tmp_qloop_74;
+                   const real_t q_tmp_0_4 = tmp_qloop_79;
+                   const real_t q_tmp_0_5 = tmp_qloop_84;
+                   const real_t q_tmp_1_0 = tmp_qloop_61;
+                   const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                   const real_t q_tmp_1_2 = tmp_qloop_86;
+                   const real_t q_tmp_1_3 = tmp_qloop_87;
+                   const real_t q_tmp_1_4 = tmp_qloop_88;
+                   const real_t q_tmp_1_5 = tmp_qloop_89;
+                   const real_t q_tmp_2_0 = tmp_qloop_66;
+                   const real_t q_tmp_2_1 = tmp_qloop_86;
+                   const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                   const real_t q_tmp_2_3 = tmp_qloop_91;
+                   const real_t q_tmp_2_4 = tmp_qloop_92;
+                   const real_t q_tmp_2_5 = tmp_qloop_93;
+                   const real_t q_tmp_3_0 = tmp_qloop_74;
+                   const real_t q_tmp_3_1 = tmp_qloop_87;
+                   const real_t q_tmp_3_2 = tmp_qloop_91;
+                   const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                   const real_t q_tmp_3_4 = tmp_qloop_95;
+                   const real_t q_tmp_3_5 = tmp_qloop_96;
+                   const real_t q_tmp_4_0 = tmp_qloop_79;
+                   const real_t q_tmp_4_1 = tmp_qloop_88;
+                   const real_t q_tmp_4_2 = tmp_qloop_92;
+                   const real_t q_tmp_4_3 = tmp_qloop_95;
+                   const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_97;
+                   const real_t q_tmp_5_0 = tmp_qloop_84;
+                   const real_t q_tmp_5_1 = tmp_qloop_89;
+                   const real_t q_tmp_5_2 = tmp_qloop_93;
+                   const real_t q_tmp_5_3 = tmp_qloop_96;
+                   const real_t q_tmp_5_4 = tmp_qloop_97;
+                   const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(tmp_qloop_67,tmp_qloop_68);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))));
+                   const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_58))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_58))));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_60);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_64))));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_65);
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_73);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_77))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_78);
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_82))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_82))));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_59);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_65,tmp_qloop_85);
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_85);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_85);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_85);
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_65);
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_90);
+                   const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_90);
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_90);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_73);
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_94);
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_94);
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_78),tmp_qloop_83);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54),tmp_qloop_55);
+                   const __m256d q_tmp_0_1 = tmp_qloop_61;
+                   const __m256d q_tmp_0_2 = tmp_qloop_66;
+                   const __m256d q_tmp_0_3 = tmp_qloop_74;
+                   const __m256d q_tmp_0_4 = tmp_qloop_79;
+                   const __m256d q_tmp_0_5 = tmp_qloop_84;
+                   const __m256d q_tmp_1_0 = tmp_qloop_61;
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_59,tmp_qloop_59));
+                   const __m256d q_tmp_1_2 = tmp_qloop_86;
+                   const __m256d q_tmp_1_3 = tmp_qloop_87;
+                   const __m256d q_tmp_1_4 = tmp_qloop_88;
+                   const __m256d q_tmp_1_5 = tmp_qloop_89;
+                   const __m256d q_tmp_2_0 = tmp_qloop_66;
+                   const __m256d q_tmp_2_1 = tmp_qloop_86;
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_65,tmp_qloop_65));
+                   const __m256d q_tmp_2_3 = tmp_qloop_91;
+                   const __m256d q_tmp_2_4 = tmp_qloop_92;
+                   const __m256d q_tmp_2_5 = tmp_qloop_93;
+                   const __m256d q_tmp_3_0 = tmp_qloop_74;
+                   const __m256d q_tmp_3_1 = tmp_qloop_87;
+                   const __m256d q_tmp_3_2 = tmp_qloop_91;
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_73,tmp_qloop_73));
+                   const __m256d q_tmp_3_4 = tmp_qloop_95;
+                   const __m256d q_tmp_3_5 = tmp_qloop_96;
+                   const __m256d q_tmp_4_0 = tmp_qloop_79;
+                   const __m256d q_tmp_4_1 = tmp_qloop_88;
+                   const __m256d q_tmp_4_2 = tmp_qloop_92;
+                   const __m256d q_tmp_4_3 = tmp_qloop_95;
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_78,tmp_qloop_78));
+                   const __m256d q_tmp_4_5 = tmp_qloop_97;
+                   const __m256d q_tmp_5_0 = tmp_qloop_84;
+                   const __m256d q_tmp_5_1 = tmp_qloop_89;
+                   const __m256d q_tmp_5_2 = tmp_qloop_93;
+                   const __m256d q_tmp_5_3 = tmp_qloop_96;
+                   const __m256d q_tmp_5_4 = tmp_qloop_97;
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_83,tmp_qloop_83));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56;
+                   const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56;
+                   const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62;
+                   const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                   const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68;
+                   const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71;
+                   const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67;
+                   const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                   const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                   const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                   const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                   const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                   const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                   const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                   const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                   const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                   const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                   const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                   const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                   const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                   const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                   const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                   const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                   const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                   const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                   const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                   const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                   const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                   const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                   const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                   const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                   const real_t q_tmp_0_1 = tmp_qloop_61;
+                   const real_t q_tmp_0_2 = tmp_qloop_66;
+                   const real_t q_tmp_0_3 = tmp_qloop_74;
+                   const real_t q_tmp_0_4 = tmp_qloop_79;
+                   const real_t q_tmp_0_5 = tmp_qloop_84;
+                   const real_t q_tmp_1_0 = tmp_qloop_61;
+                   const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                   const real_t q_tmp_1_2 = tmp_qloop_86;
+                   const real_t q_tmp_1_3 = tmp_qloop_87;
+                   const real_t q_tmp_1_4 = tmp_qloop_88;
+                   const real_t q_tmp_1_5 = tmp_qloop_89;
+                   const real_t q_tmp_2_0 = tmp_qloop_66;
+                   const real_t q_tmp_2_1 = tmp_qloop_86;
+                   const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                   const real_t q_tmp_2_3 = tmp_qloop_91;
+                   const real_t q_tmp_2_4 = tmp_qloop_92;
+                   const real_t q_tmp_2_5 = tmp_qloop_93;
+                   const real_t q_tmp_3_0 = tmp_qloop_74;
+                   const real_t q_tmp_3_1 = tmp_qloop_87;
+                   const real_t q_tmp_3_2 = tmp_qloop_91;
+                   const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                   const real_t q_tmp_3_4 = tmp_qloop_95;
+                   const real_t q_tmp_3_5 = tmp_qloop_96;
+                   const real_t q_tmp_4_0 = tmp_qloop_79;
+                   const real_t q_tmp_4_1 = tmp_qloop_88;
+                   const real_t q_tmp_4_2 = tmp_qloop_92;
+                   const real_t q_tmp_4_3 = tmp_qloop_95;
+                   const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_97;
+                   const real_t q_tmp_5_0 = tmp_qloop_84;
+                   const real_t q_tmp_5_1 = tmp_qloop_89;
+                   const real_t q_tmp_5_2 = tmp_qloop_93;
+                   const real_t q_tmp_5_3 = tmp_qloop_96;
+                   const real_t q_tmp_5_4 = tmp_qloop_97;
+                   const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3652b354c8920b8aa252f6722b12f4bd3f0db2a6
--- /dev/null
+++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,761 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57))))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60))))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66))))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+                   const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+                   const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                   const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62;
+                   const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65;
+                   const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61;
+                   const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41)));
+                   const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)));
+                   const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)));
+                   const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)));
+                   const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)));
+                   const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57))))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60))))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66))))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                   const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55;
+                   const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58;
+                   const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                   const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62;
+                   const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65;
+                   const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61;
+                   const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41)));
+                   const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)));
+                   const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)));
+                   const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)));
+                   const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)));
+                   const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a155120548926473fedd0755ac7b832dd89d364
--- /dev/null
+++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
@@ -0,0 +1,1000 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvection::apply_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_19);
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_22);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_18);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_22);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_41);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_41);
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_28);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_45);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,tmp_qloop_31),tmp_qloop_34);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_15),tmp_qloop_16);
+                   const __m256d q_tmp_0_1 = tmp_qloop_20;
+                   const __m256d q_tmp_0_2 = tmp_qloop_23;
+                   const __m256d q_tmp_0_3 = tmp_qloop_29;
+                   const __m256d q_tmp_0_4 = tmp_qloop_32;
+                   const __m256d q_tmp_0_5 = tmp_qloop_35;
+                   const __m256d q_tmp_1_0 = tmp_qloop_20;
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_18,tmp_qloop_18));
+                   const __m256d q_tmp_1_2 = tmp_qloop_37;
+                   const __m256d q_tmp_1_3 = tmp_qloop_38;
+                   const __m256d q_tmp_1_4 = tmp_qloop_39;
+                   const __m256d q_tmp_1_5 = tmp_qloop_40;
+                   const __m256d q_tmp_2_0 = tmp_qloop_23;
+                   const __m256d q_tmp_2_1 = tmp_qloop_37;
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_22,tmp_qloop_22));
+                   const __m256d q_tmp_2_3 = tmp_qloop_42;
+                   const __m256d q_tmp_2_4 = tmp_qloop_43;
+                   const __m256d q_tmp_2_5 = tmp_qloop_44;
+                   const __m256d q_tmp_3_0 = tmp_qloop_29;
+                   const __m256d q_tmp_3_1 = tmp_qloop_38;
+                   const __m256d q_tmp_3_2 = tmp_qloop_42;
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_3_4 = tmp_qloop_46;
+                   const __m256d q_tmp_3_5 = tmp_qloop_47;
+                   const __m256d q_tmp_4_0 = tmp_qloop_32;
+                   const __m256d q_tmp_4_1 = tmp_qloop_39;
+                   const __m256d q_tmp_4_2 = tmp_qloop_43;
+                   const __m256d q_tmp_4_3 = tmp_qloop_46;
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_4_5 = tmp_qloop_48;
+                   const __m256d q_tmp_5_0 = tmp_qloop_35;
+                   const __m256d q_tmp_5_1 = tmp_qloop_40;
+                   const __m256d q_tmp_5_2 = tmp_qloop_44;
+                   const __m256d q_tmp_5_3 = tmp_qloop_47;
+                   const __m256d q_tmp_5_4 = tmp_qloop_48;
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_34,tmp_qloop_34));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                   const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                   const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27);
+                   const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26);
+                   const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                   const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                   const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                   const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                   const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                   const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                   const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                   const real_t q_tmp_0_1 = tmp_qloop_20;
+                   const real_t q_tmp_0_2 = tmp_qloop_23;
+                   const real_t q_tmp_0_3 = tmp_qloop_29;
+                   const real_t q_tmp_0_4 = tmp_qloop_32;
+                   const real_t q_tmp_0_5 = tmp_qloop_35;
+                   const real_t q_tmp_1_0 = tmp_qloop_20;
+                   const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                   const real_t q_tmp_1_2 = tmp_qloop_37;
+                   const real_t q_tmp_1_3 = tmp_qloop_38;
+                   const real_t q_tmp_1_4 = tmp_qloop_39;
+                   const real_t q_tmp_1_5 = tmp_qloop_40;
+                   const real_t q_tmp_2_0 = tmp_qloop_23;
+                   const real_t q_tmp_2_1 = tmp_qloop_37;
+                   const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                   const real_t q_tmp_2_3 = tmp_qloop_42;
+                   const real_t q_tmp_2_4 = tmp_qloop_43;
+                   const real_t q_tmp_2_5 = tmp_qloop_44;
+                   const real_t q_tmp_3_0 = tmp_qloop_29;
+                   const real_t q_tmp_3_1 = tmp_qloop_38;
+                   const real_t q_tmp_3_2 = tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                   const real_t q_tmp_3_4 = tmp_qloop_46;
+                   const real_t q_tmp_3_5 = tmp_qloop_47;
+                   const real_t q_tmp_4_0 = tmp_qloop_32;
+                   const real_t q_tmp_4_1 = tmp_qloop_39;
+                   const real_t q_tmp_4_2 = tmp_qloop_43;
+                   const real_t q_tmp_4_3 = tmp_qloop_46;
+                   const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                   const real_t q_tmp_4_5 = tmp_qloop_48;
+                   const real_t q_tmp_5_0 = tmp_qloop_35;
+                   const real_t q_tmp_5_1 = tmp_qloop_40;
+                   const real_t q_tmp_5_2 = tmp_qloop_44;
+                   const real_t q_tmp_5_3 = tmp_qloop_47;
+                   const real_t q_tmp_5_4 = tmp_qloop_48;
+                   const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_19);
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_22);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_18);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_22);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_41);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_41);
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_28);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_45);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,tmp_qloop_31),tmp_qloop_34);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_15),tmp_qloop_16);
+                   const __m256d q_tmp_0_1 = tmp_qloop_20;
+                   const __m256d q_tmp_0_2 = tmp_qloop_23;
+                   const __m256d q_tmp_0_3 = tmp_qloop_29;
+                   const __m256d q_tmp_0_4 = tmp_qloop_32;
+                   const __m256d q_tmp_0_5 = tmp_qloop_35;
+                   const __m256d q_tmp_1_0 = tmp_qloop_20;
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_18,tmp_qloop_18));
+                   const __m256d q_tmp_1_2 = tmp_qloop_37;
+                   const __m256d q_tmp_1_3 = tmp_qloop_38;
+                   const __m256d q_tmp_1_4 = tmp_qloop_39;
+                   const __m256d q_tmp_1_5 = tmp_qloop_40;
+                   const __m256d q_tmp_2_0 = tmp_qloop_23;
+                   const __m256d q_tmp_2_1 = tmp_qloop_37;
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_22,tmp_qloop_22));
+                   const __m256d q_tmp_2_3 = tmp_qloop_42;
+                   const __m256d q_tmp_2_4 = tmp_qloop_43;
+                   const __m256d q_tmp_2_5 = tmp_qloop_44;
+                   const __m256d q_tmp_3_0 = tmp_qloop_29;
+                   const __m256d q_tmp_3_1 = tmp_qloop_38;
+                   const __m256d q_tmp_3_2 = tmp_qloop_42;
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_3_4 = tmp_qloop_46;
+                   const __m256d q_tmp_3_5 = tmp_qloop_47;
+                   const __m256d q_tmp_4_0 = tmp_qloop_32;
+                   const __m256d q_tmp_4_1 = tmp_qloop_39;
+                   const __m256d q_tmp_4_2 = tmp_qloop_43;
+                   const __m256d q_tmp_4_3 = tmp_qloop_46;
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_4_5 = tmp_qloop_48;
+                   const __m256d q_tmp_5_0 = tmp_qloop_35;
+                   const __m256d q_tmp_5_1 = tmp_qloop_40;
+                   const __m256d q_tmp_5_2 = tmp_qloop_44;
+                   const __m256d q_tmp_5_3 = tmp_qloop_47;
+                   const __m256d q_tmp_5_4 = tmp_qloop_48;
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_34,tmp_qloop_34));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                   const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                   const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27);
+                   const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26);
+                   const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                   const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                   const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                   const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                   const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                   const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                   const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                   const real_t q_tmp_0_1 = tmp_qloop_20;
+                   const real_t q_tmp_0_2 = tmp_qloop_23;
+                   const real_t q_tmp_0_3 = tmp_qloop_29;
+                   const real_t q_tmp_0_4 = tmp_qloop_32;
+                   const real_t q_tmp_0_5 = tmp_qloop_35;
+                   const real_t q_tmp_1_0 = tmp_qloop_20;
+                   const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                   const real_t q_tmp_1_2 = tmp_qloop_37;
+                   const real_t q_tmp_1_3 = tmp_qloop_38;
+                   const real_t q_tmp_1_4 = tmp_qloop_39;
+                   const real_t q_tmp_1_5 = tmp_qloop_40;
+                   const real_t q_tmp_2_0 = tmp_qloop_23;
+                   const real_t q_tmp_2_1 = tmp_qloop_37;
+                   const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                   const real_t q_tmp_2_3 = tmp_qloop_42;
+                   const real_t q_tmp_2_4 = tmp_qloop_43;
+                   const real_t q_tmp_2_5 = tmp_qloop_44;
+                   const real_t q_tmp_3_0 = tmp_qloop_29;
+                   const real_t q_tmp_3_1 = tmp_qloop_38;
+                   const real_t q_tmp_3_2 = tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                   const real_t q_tmp_3_4 = tmp_qloop_46;
+                   const real_t q_tmp_3_5 = tmp_qloop_47;
+                   const real_t q_tmp_4_0 = tmp_qloop_32;
+                   const real_t q_tmp_4_1 = tmp_qloop_39;
+                   const real_t q_tmp_4_2 = tmp_qloop_43;
+                   const real_t q_tmp_4_3 = tmp_qloop_46;
+                   const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                   const real_t q_tmp_4_5 = tmp_qloop_48;
+                   const real_t q_tmp_5_0 = tmp_qloop_35;
+                   const real_t q_tmp_5_1 = tmp_qloop_40;
+                   const real_t q_tmp_5_2 = tmp_qloop_44;
+                   const real_t q_tmp_5_3 = tmp_qloop_47;
+                   const real_t q_tmp_5_4 = tmp_qloop_48;
+                   const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..769d0c05e9bb3f1b0ffc378448fca4a708ec31e2
--- /dev/null
+++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
@@ -0,0 +1,516 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2)));
+                   const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16));
+                   const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17));
+                   const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)));
+                   const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21)));
+                   const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20)));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                   const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2)));
+                   const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16));
+                   const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17));
+                   const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)));
+                   const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21)));
+                   const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20)));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2abb673f69cb6c5adc082bf1a18d5bb8cfb5d590
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,687 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvectionAnnulusMap::apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62;
+                const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68;
+                const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71;
+                const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                const real_t q_tmp_0_1 = tmp_qloop_61;
+                const real_t q_tmp_0_2 = tmp_qloop_66;
+                const real_t q_tmp_0_3 = tmp_qloop_74;
+                const real_t q_tmp_0_4 = tmp_qloop_79;
+                const real_t q_tmp_0_5 = tmp_qloop_84;
+                const real_t q_tmp_1_0 = tmp_qloop_61;
+                const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                const real_t q_tmp_1_2 = tmp_qloop_86;
+                const real_t q_tmp_1_3 = tmp_qloop_87;
+                const real_t q_tmp_1_4 = tmp_qloop_88;
+                const real_t q_tmp_1_5 = tmp_qloop_89;
+                const real_t q_tmp_2_0 = tmp_qloop_66;
+                const real_t q_tmp_2_1 = tmp_qloop_86;
+                const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                const real_t q_tmp_2_3 = tmp_qloop_91;
+                const real_t q_tmp_2_4 = tmp_qloop_92;
+                const real_t q_tmp_2_5 = tmp_qloop_93;
+                const real_t q_tmp_3_0 = tmp_qloop_74;
+                const real_t q_tmp_3_1 = tmp_qloop_87;
+                const real_t q_tmp_3_2 = tmp_qloop_91;
+                const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                const real_t q_tmp_3_4 = tmp_qloop_95;
+                const real_t q_tmp_3_5 = tmp_qloop_96;
+                const real_t q_tmp_4_0 = tmp_qloop_79;
+                const real_t q_tmp_4_1 = tmp_qloop_88;
+                const real_t q_tmp_4_2 = tmp_qloop_92;
+                const real_t q_tmp_4_3 = tmp_qloop_95;
+                const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_97;
+                const real_t q_tmp_5_0 = tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_93;
+                const real_t q_tmp_5_3 = tmp_qloop_96;
+                const real_t q_tmp_5_4 = tmp_qloop_97;
+                const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62;
+                const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68;
+                const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71;
+                const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                const real_t q_tmp_0_1 = tmp_qloop_61;
+                const real_t q_tmp_0_2 = tmp_qloop_66;
+                const real_t q_tmp_0_3 = tmp_qloop_74;
+                const real_t q_tmp_0_4 = tmp_qloop_79;
+                const real_t q_tmp_0_5 = tmp_qloop_84;
+                const real_t q_tmp_1_0 = tmp_qloop_61;
+                const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                const real_t q_tmp_1_2 = tmp_qloop_86;
+                const real_t q_tmp_1_3 = tmp_qloop_87;
+                const real_t q_tmp_1_4 = tmp_qloop_88;
+                const real_t q_tmp_1_5 = tmp_qloop_89;
+                const real_t q_tmp_2_0 = tmp_qloop_66;
+                const real_t q_tmp_2_1 = tmp_qloop_86;
+                const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                const real_t q_tmp_2_3 = tmp_qloop_91;
+                const real_t q_tmp_2_4 = tmp_qloop_92;
+                const real_t q_tmp_2_5 = tmp_qloop_93;
+                const real_t q_tmp_3_0 = tmp_qloop_74;
+                const real_t q_tmp_3_1 = tmp_qloop_87;
+                const real_t q_tmp_3_2 = tmp_qloop_91;
+                const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                const real_t q_tmp_3_4 = tmp_qloop_95;
+                const real_t q_tmp_3_5 = tmp_qloop_96;
+                const real_t q_tmp_4_0 = tmp_qloop_79;
+                const real_t q_tmp_4_1 = tmp_qloop_88;
+                const real_t q_tmp_4_2 = tmp_qloop_92;
+                const real_t q_tmp_4_3 = tmp_qloop_95;
+                const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_97;
+                const real_t q_tmp_5_0 = tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_93;
+                const real_t q_tmp_5_3 = tmp_qloop_96;
+                const real_t q_tmp_5_4 = tmp_qloop_97;
+                const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..339901c4448e984cce6a8f9686e7742fbf606d20
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,445 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+                const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58;
+                const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+                const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62;
+                const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65;
+                const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61;
+                const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41)));
+                const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)));
+                const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)));
+                const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)));
+                const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)));
+                const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55;
+                const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58;
+                const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58;
+                const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65;
+                const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62;
+                const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65;
+                const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61;
+                const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41)));
+                const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)));
+                const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)));
+                const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)));
+                const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)));
+                const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bddd6dc76b537c69b4e9b51c8fb6035eb02baee6
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,845 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvectionAnnulusMap::toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62;
+                const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68;
+                const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71;
+                const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                const real_t q_tmp_0_1 = tmp_qloop_61;
+                const real_t q_tmp_0_2 = tmp_qloop_66;
+                const real_t q_tmp_0_3 = tmp_qloop_74;
+                const real_t q_tmp_0_4 = tmp_qloop_79;
+                const real_t q_tmp_0_5 = tmp_qloop_84;
+                const real_t q_tmp_1_0 = tmp_qloop_61;
+                const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                const real_t q_tmp_1_2 = tmp_qloop_86;
+                const real_t q_tmp_1_3 = tmp_qloop_87;
+                const real_t q_tmp_1_4 = tmp_qloop_88;
+                const real_t q_tmp_1_5 = tmp_qloop_89;
+                const real_t q_tmp_2_0 = tmp_qloop_66;
+                const real_t q_tmp_2_1 = tmp_qloop_86;
+                const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                const real_t q_tmp_2_3 = tmp_qloop_91;
+                const real_t q_tmp_2_4 = tmp_qloop_92;
+                const real_t q_tmp_2_5 = tmp_qloop_93;
+                const real_t q_tmp_3_0 = tmp_qloop_74;
+                const real_t q_tmp_3_1 = tmp_qloop_87;
+                const real_t q_tmp_3_2 = tmp_qloop_91;
+                const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                const real_t q_tmp_3_4 = tmp_qloop_95;
+                const real_t q_tmp_3_5 = tmp_qloop_96;
+                const real_t q_tmp_4_0 = tmp_qloop_79;
+                const real_t q_tmp_4_1 = tmp_qloop_88;
+                const real_t q_tmp_4_2 = tmp_qloop_92;
+                const real_t q_tmp_4_3 = tmp_qloop_95;
+                const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_97;
+                const real_t q_tmp_5_0 = tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_93;
+                const real_t q_tmp_5_3 = tmp_qloop_96;
+                const real_t q_tmp_5_4 = tmp_qloop_97;
+                const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0;
+                const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56;
+                const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62;
+                const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62;
+                const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68;
+                const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68;
+                const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71;
+                const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41);
+                const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58);
+                const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60;
+                const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64);
+                const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65;
+                const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72);
+                const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73;
+                const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77);
+                const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78;
+                const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82);
+                const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83;
+                const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59;
+                const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85;
+                const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85;
+                const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85;
+                const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85;
+                const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65;
+                const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90;
+                const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90;
+                const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90;
+                const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73;
+                const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94;
+                const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94;
+                const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55;
+                const real_t q_tmp_0_1 = tmp_qloop_61;
+                const real_t q_tmp_0_2 = tmp_qloop_66;
+                const real_t q_tmp_0_3 = tmp_qloop_74;
+                const real_t q_tmp_0_4 = tmp_qloop_79;
+                const real_t q_tmp_0_5 = tmp_qloop_84;
+                const real_t q_tmp_1_0 = tmp_qloop_61;
+                const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59);
+                const real_t q_tmp_1_2 = tmp_qloop_86;
+                const real_t q_tmp_1_3 = tmp_qloop_87;
+                const real_t q_tmp_1_4 = tmp_qloop_88;
+                const real_t q_tmp_1_5 = tmp_qloop_89;
+                const real_t q_tmp_2_0 = tmp_qloop_66;
+                const real_t q_tmp_2_1 = tmp_qloop_86;
+                const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65);
+                const real_t q_tmp_2_3 = tmp_qloop_91;
+                const real_t q_tmp_2_4 = tmp_qloop_92;
+                const real_t q_tmp_2_5 = tmp_qloop_93;
+                const real_t q_tmp_3_0 = tmp_qloop_74;
+                const real_t q_tmp_3_1 = tmp_qloop_87;
+                const real_t q_tmp_3_2 = tmp_qloop_91;
+                const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73);
+                const real_t q_tmp_3_4 = tmp_qloop_95;
+                const real_t q_tmp_3_5 = tmp_qloop_96;
+                const real_t q_tmp_4_0 = tmp_qloop_79;
+                const real_t q_tmp_4_1 = tmp_qloop_88;
+                const real_t q_tmp_4_2 = tmp_qloop_92;
+                const real_t q_tmp_4_3 = tmp_qloop_95;
+                const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_97;
+                const real_t q_tmp_5_0 = tmp_qloop_84;
+                const real_t q_tmp_5_1 = tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_93;
+                const real_t q_tmp_5_3 = tmp_qloop_96;
+                const real_t q_tmp_5_4 = tmp_qloop_97;
+                const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e14daf120eec7a2467cae9dd111d2833585b6c6d
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp
@@ -0,0 +1,560 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvection::apply_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17;
+                const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27);
+                const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26);
+                const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                const real_t q_tmp_0_1 = tmp_qloop_20;
+                const real_t q_tmp_0_2 = tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_qloop_29;
+                const real_t q_tmp_0_4 = tmp_qloop_32;
+                const real_t q_tmp_0_5 = tmp_qloop_35;
+                const real_t q_tmp_1_0 = tmp_qloop_20;
+                const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                const real_t q_tmp_1_2 = tmp_qloop_37;
+                const real_t q_tmp_1_3 = tmp_qloop_38;
+                const real_t q_tmp_1_4 = tmp_qloop_39;
+                const real_t q_tmp_1_5 = tmp_qloop_40;
+                const real_t q_tmp_2_0 = tmp_qloop_23;
+                const real_t q_tmp_2_1 = tmp_qloop_37;
+                const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                const real_t q_tmp_2_3 = tmp_qloop_42;
+                const real_t q_tmp_2_4 = tmp_qloop_43;
+                const real_t q_tmp_2_5 = tmp_qloop_44;
+                const real_t q_tmp_3_0 = tmp_qloop_29;
+                const real_t q_tmp_3_1 = tmp_qloop_38;
+                const real_t q_tmp_3_2 = tmp_qloop_42;
+                const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                const real_t q_tmp_3_4 = tmp_qloop_46;
+                const real_t q_tmp_3_5 = tmp_qloop_47;
+                const real_t q_tmp_4_0 = tmp_qloop_32;
+                const real_t q_tmp_4_1 = tmp_qloop_39;
+                const real_t q_tmp_4_2 = tmp_qloop_43;
+                const real_t q_tmp_4_3 = tmp_qloop_46;
+                const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                const real_t q_tmp_4_5 = tmp_qloop_48;
+                const real_t q_tmp_5_0 = tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_40;
+                const real_t q_tmp_5_2 = tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_47;
+                const real_t q_tmp_5_4 = tmp_qloop_48;
+                const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17;
+                const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27);
+                const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26);
+                const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                const real_t q_tmp_0_1 = tmp_qloop_20;
+                const real_t q_tmp_0_2 = tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_qloop_29;
+                const real_t q_tmp_0_4 = tmp_qloop_32;
+                const real_t q_tmp_0_5 = tmp_qloop_35;
+                const real_t q_tmp_1_0 = tmp_qloop_20;
+                const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                const real_t q_tmp_1_2 = tmp_qloop_37;
+                const real_t q_tmp_1_3 = tmp_qloop_38;
+                const real_t q_tmp_1_4 = tmp_qloop_39;
+                const real_t q_tmp_1_5 = tmp_qloop_40;
+                const real_t q_tmp_2_0 = tmp_qloop_23;
+                const real_t q_tmp_2_1 = tmp_qloop_37;
+                const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                const real_t q_tmp_2_3 = tmp_qloop_42;
+                const real_t q_tmp_2_4 = tmp_qloop_43;
+                const real_t q_tmp_2_5 = tmp_qloop_44;
+                const real_t q_tmp_3_0 = tmp_qloop_29;
+                const real_t q_tmp_3_1 = tmp_qloop_38;
+                const real_t q_tmp_3_2 = tmp_qloop_42;
+                const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                const real_t q_tmp_3_4 = tmp_qloop_46;
+                const real_t q_tmp_3_5 = tmp_qloop_47;
+                const real_t q_tmp_4_0 = tmp_qloop_32;
+                const real_t q_tmp_4_1 = tmp_qloop_39;
+                const real_t q_tmp_4_2 = tmp_qloop_43;
+                const real_t q_tmp_4_3 = tmp_qloop_46;
+                const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                const real_t q_tmp_4_5 = tmp_qloop_48;
+                const real_t q_tmp_5_0 = tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_40;
+                const real_t q_tmp_5_2 = tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_47;
+                const real_t q_tmp_5_4 = tmp_qloop_48;
+                const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..09067a59c87202aef200b743fae247f2cf4813bb
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp
@@ -0,0 +1,318 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2)));
+                const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16));
+                const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17));
+                const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)));
+                const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21)));
+                const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20)));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2)));
+                const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16));
+                const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17));
+                const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)));
+                const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21)));
+                const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20)));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d8e7c0365d13094bae5821f58975e181f1899c67
--- /dev/null
+++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp
@@ -0,0 +1,718 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgAdvection.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgAdvection::toMatrix_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT  _data_cp_times_deltaEdge, real_t * RESTRICT  _data_cp_times_deltaVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17;
+                const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27);
+                const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26);
+                const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                const real_t q_tmp_0_1 = tmp_qloop_20;
+                const real_t q_tmp_0_2 = tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_qloop_29;
+                const real_t q_tmp_0_4 = tmp_qloop_32;
+                const real_t q_tmp_0_5 = tmp_qloop_35;
+                const real_t q_tmp_1_0 = tmp_qloop_20;
+                const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                const real_t q_tmp_1_2 = tmp_qloop_37;
+                const real_t q_tmp_1_3 = tmp_qloop_38;
+                const real_t q_tmp_1_4 = tmp_qloop_39;
+                const real_t q_tmp_1_5 = tmp_qloop_40;
+                const real_t q_tmp_2_0 = tmp_qloop_23;
+                const real_t q_tmp_2_1 = tmp_qloop_37;
+                const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                const real_t q_tmp_2_3 = tmp_qloop_42;
+                const real_t q_tmp_2_4 = tmp_qloop_43;
+                const real_t q_tmp_2_5 = tmp_qloop_44;
+                const real_t q_tmp_3_0 = tmp_qloop_29;
+                const real_t q_tmp_3_1 = tmp_qloop_38;
+                const real_t q_tmp_3_2 = tmp_qloop_42;
+                const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                const real_t q_tmp_3_4 = tmp_qloop_46;
+                const real_t q_tmp_3_5 = tmp_qloop_47;
+                const real_t q_tmp_4_0 = tmp_qloop_32;
+                const real_t q_tmp_4_1 = tmp_qloop_39;
+                const real_t q_tmp_4_2 = tmp_qloop_43;
+                const real_t q_tmp_4_3 = tmp_qloop_46;
+                const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                const real_t q_tmp_4_5 = tmp_qloop_48;
+                const real_t q_tmp_5_0 = tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_40;
+                const real_t q_tmp_5_2 = tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_47;
+                const real_t q_tmp_5_4 = tmp_qloop_48;
+                const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2;
+                const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2);
+                const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17;
+                const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19;
+                const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27);
+                const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31;
+                const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26);
+                const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34;
+                const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22;
+                const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41;
+                const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41;
+                const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41;
+                const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28;
+                const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45;
+                const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45;
+                const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34;
+                const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16;
+                const real_t q_tmp_0_1 = tmp_qloop_20;
+                const real_t q_tmp_0_2 = tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_qloop_29;
+                const real_t q_tmp_0_4 = tmp_qloop_32;
+                const real_t q_tmp_0_5 = tmp_qloop_35;
+                const real_t q_tmp_1_0 = tmp_qloop_20;
+                const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18);
+                const real_t q_tmp_1_2 = tmp_qloop_37;
+                const real_t q_tmp_1_3 = tmp_qloop_38;
+                const real_t q_tmp_1_4 = tmp_qloop_39;
+                const real_t q_tmp_1_5 = tmp_qloop_40;
+                const real_t q_tmp_2_0 = tmp_qloop_23;
+                const real_t q_tmp_2_1 = tmp_qloop_37;
+                const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22);
+                const real_t q_tmp_2_3 = tmp_qloop_42;
+                const real_t q_tmp_2_4 = tmp_qloop_43;
+                const real_t q_tmp_2_5 = tmp_qloop_44;
+                const real_t q_tmp_3_0 = tmp_qloop_29;
+                const real_t q_tmp_3_1 = tmp_qloop_38;
+                const real_t q_tmp_3_2 = tmp_qloop_42;
+                const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28);
+                const real_t q_tmp_3_4 = tmp_qloop_46;
+                const real_t q_tmp_3_5 = tmp_qloop_47;
+                const real_t q_tmp_4_0 = tmp_qloop_32;
+                const real_t q_tmp_4_1 = tmp_qloop_39;
+                const real_t q_tmp_4_2 = tmp_qloop_43;
+                const real_t q_tmp_4_3 = tmp_qloop_46;
+                const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31);
+                const real_t q_tmp_4_5 = tmp_qloop_48;
+                const real_t q_tmp_5_0 = tmp_qloop_35;
+                const real_t q_tmp_5_1 = tmp_qloop_40;
+                const real_t q_tmp_5_2 = tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_47;
+                const real_t q_tmp_5_4 = tmp_qloop_48;
+                const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/CMakeLists.txt b/operators/supg_diffusion/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0196ac6f83fd8ec8652479f68d5edb9a022cfab2
--- /dev/null
+++ b/operators/supg_diffusion/CMakeLists.txt
@@ -0,0 +1,52 @@
+add_library( opgen-supg_diffusion
+
+   P2ElementwiseSupgDiffusion.cpp
+   P2ElementwiseSupgDiffusion.hpp
+   P2ElementwiseSupgDiffusionAnnulusMap.cpp
+   P2ElementwiseSupgDiffusionAnnulusMap.hpp
+)
+
+if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
+   target_sources(opgen-supg_diffusion PRIVATE
+
+      avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp
+   )
+
+   set_source_files_properties(
+
+      avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
+      avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
+
+      PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
+   )
+else()
+   if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY)
+      message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.")
+   endif()
+
+   target_sources(opgen-supg_diffusion PRIVATE
+
+      noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp
+   )
+endif()
+
+if (HYTEG_BUILD_WITH_PETSC)
+   target_link_libraries(opgen-supg_diffusion PUBLIC PETSc::PETSc)
+endif ()
+if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT)
+    target_compile_features(opgen-supg_diffusion PUBLIC cxx_std_23)
+else ()
+    target_compile_features(opgen-supg_diffusion PUBLIC cxx_std_17)
+endif ()
diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3672880492641734e893f9fad9586edbf827ca38
--- /dev/null
+++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp
@@ -0,0 +1,397 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseSupgDiffusion::P2ElementwiseSupgDiffusion( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                        size_t                                     minLevel,
+                                                        size_t                                     maxLevel,
+                                                        const P2Function< real_t >&                _diffusivity_times_delta,
+                                                        const P2Function< real_t >&                _wx,
+                                                        const P2Function< real_t >&                _wy )
+: Operator( storage, minLevel, maxLevel )
+, diffusivity_times_delta( _diffusivity_times_delta )
+, wx( _wx )
+, wy( _wy )
+{}
+
+void P2ElementwiseSupgDiffusion::apply( const P2Function< real_t >& src,
+                                        const P2Function< real_t >& dst,
+                                        uint_t                      level,
+                                        DoFType                     flag,
+                                        UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaVertex =
+             face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaEdge =
+             face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseSupgDiffusion_macro_2D(
+
+             _data_diffusivity_times_deltaEdge,
+             _data_diffusivity_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_wxEdge,
+             _data_wxVertex,
+             _data_wyEdge,
+             _data_wyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseSupgDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                           const P2Function< idx_t >&                  src,
+                                           const P2Function< idx_t >&                  dst,
+                                           uint_t                                      level,
+                                           DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      diffusivity_times_delta.communicate< Face, Cell >( level );
+      diffusivity_times_delta.communicate< Edge, Cell >( level );
+      diffusivity_times_delta.communicate< Vertex, Cell >( level );
+      wx.communicate< Face, Cell >( level );
+      wx.communicate< Edge, Cell >( level );
+      wx.communicate< Vertex, Cell >( level );
+      wy.communicate< Face, Cell >( level );
+      wy.communicate< Edge, Cell >( level );
+      wy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaVertex =
+             face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaEdge =
+             face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseSupgDiffusion_macro_2D(
+
+             _data_diffusivity_times_deltaEdge,
+             _data_diffusivity_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_wxEdge,
+             _data_wxVertex,
+             _data_wyEdge,
+             _data_wyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         diffusivity_times_delta.communicate< Face, Cell >( level );
+         diffusivity_times_delta.communicate< Edge, Cell >( level );
+         diffusivity_times_delta.communicate< Vertex, Cell >( level );
+         wx.communicate< Face, Cell >( level );
+         wx.communicate< Edge, Cell >( level );
+         wx.communicate< Vertex, Cell >( level );
+         wy.communicate< Face, Cell >( level );
+         wy.communicate< Edge, Cell >( level );
+         wy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_diffusivity_times_deltaVertex =
+                face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_diffusivity_times_deltaEdge =
+                face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D(
+
+                _data_diffusivity_times_deltaEdge,
+                _data_diffusivity_times_deltaVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_wxEdge,
+                _data_wxVertex,
+                _data_wyEdge,
+                _data_wyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgDiffusion::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd879661a7fd3317aed0dd48293b7af859dfc3a8
--- /dev/null
+++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp
@@ -0,0 +1,172 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+class P2ElementwiseSupgDiffusion : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                   public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseSupgDiffusion( const std::shared_ptr< PrimitiveStorage >& storage,
+                               size_t                                     minLevel,
+                               size_t                                     maxLevel,
+                               const P2Function< real_t >&                _diffusivity_times_delta,
+                               const P2Function< real_t >&                _wx,
+                               const P2Function< real_t >&                _wy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseSupgDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    472     584      12       0      0              0                 0              1
+   void apply_P2ElementwiseSupgDiffusion_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge,
+                                                   real_t* RESTRICT _data_diffusivity_times_deltaVertex,
+                                                   real_t* RESTRICT _data_dstEdge,
+                                                   real_t* RESTRICT _data_dstVertex,
+                                                   real_t* RESTRICT _data_srcEdge,
+                                                   real_t* RESTRICT _data_srcVertex,
+                                                   real_t* RESTRICT _data_wxEdge,
+                                                   real_t* RESTRICT _data_wxVertex,
+                                                   real_t* RESTRICT _data_wyEdge,
+                                                   real_t* RESTRICT _data_wyVertex,
+                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                   int64_t          micro_edges_per_macro_edge,
+                                                   real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseSupgDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    436     548      12       0      0              0                 0              4
+   void toMatrix_P2ElementwiseSupgDiffusion_macro_2D( real_t* RESTRICT                     _data_diffusivity_times_deltaEdge,
+                                                      real_t* RESTRICT                     _data_diffusivity_times_deltaVertex,
+                                                      idx_t* RESTRICT                      _data_dstEdge,
+                                                      idx_t* RESTRICT                      _data_dstVertex,
+                                                      idx_t* RESTRICT                      _data_srcEdge,
+                                                      idx_t* RESTRICT                      _data_srcVertex,
+                                                      real_t* RESTRICT                     _data_wxEdge,
+                                                      real_t* RESTRICT                     _data_wxVertex,
+                                                      real_t* RESTRICT                     _data_wyEdge,
+                                                      real_t* RESTRICT                     _data_wyVertex,
+                                                      real_t                               macro_vertex_coord_id_0comp0,
+                                                      real_t                               macro_vertex_coord_id_0comp1,
+                                                      real_t                               macro_vertex_coord_id_1comp0,
+                                                      real_t                               macro_vertex_coord_id_1comp1,
+                                                      real_t                               macro_vertex_coord_id_2comp0,
+                                                      real_t                               macro_vertex_coord_id_2comp1,
+                                                      std::shared_ptr< SparseMatrixProxy > mat,
+                                                      int64_t                              micro_edges_per_macro_edge,
+                                                      real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseSupgDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    322     428      12       0      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D(
+       real_t* RESTRICT _data_diffusivity_times_deltaEdge,
+       real_t* RESTRICT _data_diffusivity_times_deltaVertex,
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_wxEdge,
+       real_t* RESTRICT _data_wxVertex,
+       real_t* RESTRICT _data_wyEdge,
+       real_t* RESTRICT _data_wyVertex,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    diffusivity_times_delta;
+   P2Function< real_t >                    wx;
+   P2Function< real_t >                    wy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..da784554b119666cbeaea7787448492c560e15e1
--- /dev/null
+++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp
@@ -0,0 +1,454 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseSupgDiffusionAnnulusMap::P2ElementwiseSupgDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                            size_t                                     minLevel,
+                                                                            size_t                                     maxLevel,
+                                                                            const P2Function< real_t >& _diffusivity_times_delta,
+                                                                            const P2Function< real_t >& _wx,
+                                                                            const P2Function< real_t >& _wy )
+: Operator( storage, minLevel, maxLevel )
+, diffusivity_times_delta( _diffusivity_times_delta )
+, wx( _wx )
+, wy( _wy )
+{}
+
+void P2ElementwiseSupgDiffusionAnnulusMap::apply( const P2Function< real_t >& src,
+                                                  const P2Function< real_t >& dst,
+                                                  uint_t                      level,
+                                                  DoFType                     flag,
+                                                  UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaVertex =
+             face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaEdge =
+             face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D(
+
+             _data_diffusivity_times_deltaEdge,
+             _data_diffusivity_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_wxEdge,
+             _data_wxVertex,
+             _data_wyEdge,
+             _data_wyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseSupgDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                     const P2Function< idx_t >&                  src,
+                                                     const P2Function< idx_t >&                  dst,
+                                                     uint_t                                      level,
+                                                     DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      diffusivity_times_delta.communicate< Face, Cell >( level );
+      diffusivity_times_delta.communicate< Edge, Cell >( level );
+      diffusivity_times_delta.communicate< Vertex, Cell >( level );
+      wx.communicate< Face, Cell >( level );
+      wx.communicate< Edge, Cell >( level );
+      wx.communicate< Vertex, Cell >( level );
+      wy.communicate< Face, Cell >( level );
+      wy.communicate< Edge, Cell >( level );
+      wy.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaVertex =
+             face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_diffusivity_times_deltaEdge =
+             face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D(
+
+             _data_diffusivity_times_deltaEdge,
+             _data_diffusivity_times_deltaVertex,
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             _data_wxEdge,
+             _data_wxVertex,
+             _data_wyEdge,
+             _data_wyVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         diffusivity_times_delta.communicate< Face, Cell >( level );
+         diffusivity_times_delta.communicate< Edge, Cell >( level );
+         diffusivity_times_delta.communicate< Vertex, Cell >( level );
+         wx.communicate< Face, Cell >( level );
+         wx.communicate< Edge, Cell >( level );
+         wx.communicate< Vertex, Cell >( level );
+         wy.communicate< Face, Cell >( level );
+         wy.communicate< Edge, Cell >( level );
+         wy.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH );
+         communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_diffusivity_times_deltaVertex =
+                face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_diffusivity_times_deltaEdge =
+                face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wxEdge   = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_wyEdge   = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D(
+
+                _data_diffusivity_times_deltaEdge,
+                _data_diffusivity_times_deltaVertex,
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_wxEdge,
+                _data_wxVertex,
+                _data_wyEdge,
+                _data_wyVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+         ( *invDiag_ ).invertElementwise( level );
+      }
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgDiffusionAnnulusMap::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..62a4d379c3898759d16be34ead8b2a841e53528c
--- /dev/null
+++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp
@@ -0,0 +1,197 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+class P2ElementwiseSupgDiffusionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                             public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseSupgDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                         size_t                                     minLevel,
+                                         size_t                                     maxLevel,
+                                         const P2Function< real_t >&                _diffusivity_times_delta,
+                                         const P2Function< real_t >&                _wx,
+                                         const P2Function< real_t >&                _wy );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Integral: P2ElementwiseSupgDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///   1140    1756      20      12      0              0                 0              1
+   void apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge,
+                                                             real_t* RESTRICT _data_diffusivity_times_deltaVertex,
+                                                             real_t* RESTRICT _data_dstEdge,
+                                                             real_t* RESTRICT _data_dstVertex,
+                                                             real_t* RESTRICT _data_srcEdge,
+                                                             real_t* RESTRICT _data_srcVertex,
+                                                             real_t* RESTRICT _data_wxEdge,
+                                                             real_t* RESTRICT _data_wxVertex,
+                                                             real_t* RESTRICT _data_wyEdge,
+                                                             real_t* RESTRICT _data_wyVertex,
+                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                             int64_t          micro_edges_per_macro_edge,
+                                                             real_t           micro_edges_per_macro_edge_float,
+                                                             real_t           radRayVertex,
+                                                             real_t           radRefVertex,
+                                                             real_t           rayVertex_0,
+                                                             real_t           rayVertex_1,
+                                                             real_t           refVertex_0,
+                                                             real_t           refVertex_1,
+                                                             real_t           thrVertex_0,
+                                                             real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseSupgDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///   1104    1720      20      12      0              0                 0              4
+   void toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge,
+                                                                real_t* RESTRICT _data_diffusivity_times_deltaVertex,
+                                                                idx_t* RESTRICT  _data_dstEdge,
+                                                                idx_t* RESTRICT  _data_dstVertex,
+                                                                idx_t* RESTRICT  _data_srcEdge,
+                                                                idx_t* RESTRICT  _data_srcVertex,
+                                                                real_t* RESTRICT _data_wxEdge,
+                                                                real_t* RESTRICT _data_wxVertex,
+                                                                real_t* RESTRICT _data_wyEdge,
+                                                                real_t* RESTRICT _data_wyVertex,
+                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                std::shared_ptr< SparseMatrixProxy > mat,
+                                                                int64_t                              micro_edges_per_macro_edge,
+                                                                real_t micro_edges_per_macro_edge_float,
+                                                                real_t radRayVertex,
+                                                                real_t radRefVertex,
+                                                                real_t rayVertex_0,
+                                                                real_t rayVertex_1,
+                                                                real_t refVertex_0,
+                                                                real_t refVertex_1,
+                                                                real_t thrVertex_0,
+                                                                real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseSupgDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    990    1600      20      12      0              0                 0              1
+   void computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D(
+       real_t* RESTRICT _data_diffusivity_times_deltaEdge,
+       real_t* RESTRICT _data_diffusivity_times_deltaVertex,
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_wxEdge,
+       real_t* RESTRICT _data_wxVertex,
+       real_t* RESTRICT _data_wyEdge,
+       real_t* RESTRICT _data_wyVertex,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    diffusivity_times_delta;
+   P2Function< real_t >                    wx;
+   P2Function< real_t >                    wy;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..24934c34ccb1413b6f7370788f96488c88c6b4e6
--- /dev/null
+++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,1331 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusionAnnulusMap::apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38;
+       const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42;
+       const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0;
+       const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0;
+       const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97;
+       const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99;
+       const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102;
+       const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97;
+       const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111;
+       const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41;
+       const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99;
+       const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100;
+       const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41;
+       const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117;
+       const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38;
+       const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111;
+       const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42;
+       const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100;
+       const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99;
+       const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_47),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_48);
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_47);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_69),tmp_qloop_71),tmp_qloop_74);
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wx_dof_3),_mm256_mul_pd(tmp_qloop_72,wx_dof_1)),_mm256_mul_pd(tmp_qloop_75,wx_dof_2)),_mm256_mul_pd(tmp_qloop_76,wx_dof_4)),_mm256_mul_pd(tmp_qloop_77,wx_dof_5)),_mm256_mul_pd(tmp_qloop_78,wx_dof_0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wy_dof_3),_mm256_mul_pd(tmp_qloop_72,wy_dof_1)),_mm256_mul_pd(tmp_qloop_75,wy_dof_2)),_mm256_mul_pd(tmp_qloop_76,wy_dof_4)),_mm256_mul_pd(tmp_qloop_77,wy_dof_5)),_mm256_mul_pd(tmp_qloop_78,wy_dof_0));
+                   const __m256d tmp_qloop_86 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_47);
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_48);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_106 = _mm256_add_pd(tmp_qloop_104,tmp_qloop_105);
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_109 = _mm256_add_pd(tmp_qloop_107,tmp_qloop_108);
+                   const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_131 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_78),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_72)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_75)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_69)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_76)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_77))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_54))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_54)))));
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_88))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_88)))));
+                   const __m256d tmp_qloop_134 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_95))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_95)))));
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_109))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_109)))));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_121))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_121)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_131))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_131)))));
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_52));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_52));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_61));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_61));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57));
+                   const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_44,tmp_qloop_44,tmp_qloop_44,tmp_qloop_44)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_44,tmp_qloop_44,tmp_qloop_44,tmp_qloop_44))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_45,tmp_qloop_45,tmp_qloop_45,tmp_qloop_45)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_45,tmp_qloop_45,tmp_qloop_45,tmp_qloop_45)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_67))));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_88))));
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_95))));
+                   const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_98,tmp_qloop_98,tmp_qloop_98,tmp_qloop_98))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_98,tmp_qloop_98,tmp_qloop_98,tmp_qloop_98))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_67))));
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_115,tmp_qloop_115,tmp_qloop_115,tmp_qloop_115)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_115,tmp_qloop_115,tmp_qloop_115,tmp_qloop_115))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_118,tmp_qloop_118,tmp_qloop_118,tmp_qloop_118))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_118,tmp_qloop_118,tmp_qloop_118,tmp_qloop_118))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_67))));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_126,tmp_qloop_126,tmp_qloop_126,tmp_qloop_126)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_126,tmp_qloop_126,tmp_qloop_126,tmp_qloop_126))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_127,tmp_qloop_127,tmp_qloop_127,tmp_qloop_127)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_128,tmp_qloop_128,tmp_qloop_128,tmp_qloop_128))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_127,tmp_qloop_127,tmp_qloop_127,tmp_qloop_127)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_128,tmp_qloop_128,tmp_qloop_128,tmp_qloop_128))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_67))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_68,tmp_qloop_82);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_89);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_96);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_82);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_82);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_82);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_68);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_89);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_96);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_133);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_133);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_133);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_68);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_89);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_96);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_134);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_134);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_134);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_68);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_89);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_96);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_135);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_135);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_135);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_68);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_89);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_96);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_136);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_136);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_136);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_68);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_89);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_96);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_137);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                   const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49;
+                   const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49;
+                   const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                   const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                   const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                   const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                   const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                   const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                   const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                   const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                   const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                   const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                   const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86;
+                   const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86;
+                   const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                   const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93;
+                   const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93;
+                   const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47;
+                   const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48;
+                   const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                   const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47;
+                   const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48;
+                   const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                   const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105;
+                   const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108;
+                   const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104;
+                   const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                   const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                   const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                   const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                   const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                   const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                   const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                   const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                   const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                   const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                   const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                   const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                   const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                   const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                   const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                   const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                   const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                   const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                   const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                   const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                   const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                   const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                   const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                   const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                   const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                   const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                   const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                   const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                   const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                   const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                   const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                   const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                   const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                   const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                   const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                   const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                   const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                   const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                   const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                   const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                   const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                   const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                   const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                   const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                   const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                   const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                   const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                   const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                   const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                   const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                   const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                   const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                   const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16;
+       const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19;
+       const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21;
+       const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16;
+       const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4;
+       const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4;
+       const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29;
+       const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1;
+       const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5;
+       const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_47),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_48);
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_47);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_69),tmp_qloop_71),tmp_qloop_74);
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wx_dof_3),_mm256_mul_pd(tmp_qloop_72,wx_dof_1)),_mm256_mul_pd(tmp_qloop_75,wx_dof_2)),_mm256_mul_pd(tmp_qloop_76,wx_dof_4)),_mm256_mul_pd(tmp_qloop_77,wx_dof_5)),_mm256_mul_pd(tmp_qloop_78,wx_dof_0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wy_dof_3),_mm256_mul_pd(tmp_qloop_72,wy_dof_1)),_mm256_mul_pd(tmp_qloop_75,wy_dof_2)),_mm256_mul_pd(tmp_qloop_76,wy_dof_4)),_mm256_mul_pd(tmp_qloop_77,wy_dof_5)),_mm256_mul_pd(tmp_qloop_78,wy_dof_0));
+                   const __m256d tmp_qloop_86 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_47);
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_48);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_106 = _mm256_add_pd(tmp_qloop_104,tmp_qloop_105);
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_109 = _mm256_add_pd(tmp_qloop_107,tmp_qloop_108);
+                   const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_131 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_78),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_72)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_75)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_69)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_76)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_77))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_54))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_54)))));
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_88))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_88)))));
+                   const __m256d tmp_qloop_134 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_95))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_95)))));
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_109))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_109)))));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_121))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_121)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_131))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_131)))));
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_52));
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_52));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_61));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_61));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57));
+                   const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_67))));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_88))));
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_95))));
+                   const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_67))));
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_67))));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_67))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_68,tmp_qloop_82);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_89);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_96);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_82);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_82);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_82);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_68);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_89);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_96);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_133);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_133);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_133);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_68);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_89);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_96);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_134);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_134);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_134);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_68);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_89);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_96);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_135);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_135);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_135);
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_68);
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_89);
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_96);
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_136);
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_136);
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_136);
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_68);
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_89);
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_96);
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_137);
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                   const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49;
+                   const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49;
+                   const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                   const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                   const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                   const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                   const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                   const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                   const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                   const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                   const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                   const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                   const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86;
+                   const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86;
+                   const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                   const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93;
+                   const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93;
+                   const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47;
+                   const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48;
+                   const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                   const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47;
+                   const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48;
+                   const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                   const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105;
+                   const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108;
+                   const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104;
+                   const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                   const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                   const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                   const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                   const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                   const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                   const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                   const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                   const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                   const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                   const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                   const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                   const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                   const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                   const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                   const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                   const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                   const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                   const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                   const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                   const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                   const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                   const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                   const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                   const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                   const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                   const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                   const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                   const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                   const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                   const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                   const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                   const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                   const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                   const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                   const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                   const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                   const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                   const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                   const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                   const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                   const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                   const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                   const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                   const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                   const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                   const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                   const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                   const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                   const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                   const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                   const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                   const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e678660437ec9b75877f6770b0b7687a0aa7f532
--- /dev/null
+++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,899 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       const real_t tmp_qloop_54 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_55 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_56 = tmp_qloop_54 + tmp_qloop_55;
+       const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56 + jac_affine_inv_1_0_GRAY*tmp_qloop_56;
+       const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_59 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_60 = tmp_qloop_58 + tmp_qloop_59;
+       const real_t tmp_qloop_61 = jac_affine_inv_0_0_GRAY*tmp_qloop_60 + jac_affine_inv_1_0_GRAY*tmp_qloop_60;
+       const real_t tmp_qloop_62 = jac_affine_inv_0_1_GRAY*tmp_qloop_56 + jac_affine_inv_1_1_GRAY*tmp_qloop_56;
+       const real_t tmp_qloop_63 = jac_affine_inv_0_1_GRAY*tmp_qloop_60 + jac_affine_inv_1_1_GRAY*tmp_qloop_60;
+       const real_t tmp_qloop_84 = jac_affine_inv_0_1_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_85 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0;
+       const real_t tmp_qloop_86 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_55;
+       const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0;
+       const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_99 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_100 = jac_affine_inv_0_0_GRAY*tmp_qloop_99;
+       const real_t tmp_qloop_101 = jac_affine_inv_1_1_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_102 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+       const real_t tmp_qloop_103 = tmp_qloop_101 + tmp_qloop_102;
+       const real_t tmp_qloop_104 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_105 = jac_affine_inv_0_1_GRAY*tmp_qloop_104;
+       const real_t tmp_qloop_109 = jac_affine_inv_1_0_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_110 = -tmp_qloop_54 - tmp_qloop_99;
+       const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_110 - tmp_qloop_109;
+       const real_t tmp_qloop_112 = -tmp_qloop_104 - tmp_qloop_58;
+       const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_101;
+       const real_t tmp_qloop_114 = jac_affine_inv_1_1_GRAY*tmp_qloop_110 - tmp_qloop_102;
+       const real_t tmp_qloop_115 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+       const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_115;
+       const real_t tmp_qloop_120 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_55;
+       const real_t tmp_qloop_121 = jac_affine_inv_0_0_GRAY*tmp_qloop_120 - tmp_qloop_109;
+       const real_t tmp_qloop_122 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_59;
+       const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*tmp_qloop_122 - tmp_qloop_102;
+       const real_t tmp_qloop_124 = jac_affine_inv_0_1_GRAY*tmp_qloop_120 - tmp_qloop_101;
+       const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*tmp_qloop_122 - tmp_qloop_115;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wx_dof_3),_mm256_mul_pd(tmp_qloop_45,wx_dof_1)),_mm256_mul_pd(tmp_qloop_48,wx_dof_2)),_mm256_mul_pd(tmp_qloop_49,wx_dof_4)),_mm256_mul_pd(tmp_qloop_50,wx_dof_5)),_mm256_mul_pd(tmp_qloop_51,wx_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wy_dof_3),_mm256_mul_pd(tmp_qloop_45,wy_dof_1)),_mm256_mul_pd(tmp_qloop_48,wy_dof_2)),_mm256_mul_pd(tmp_qloop_49,wy_dof_4)),_mm256_mul_pd(tmp_qloop_50,wy_dof_5)),_mm256_mul_pd(tmp_qloop_51,wy_dof_0));
+                   const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(tmp_qloop_93,tmp_qloop_94);
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(tmp_qloop_96,tmp_qloop_97);
+                   const __m256d tmp_qloop_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_108 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_118 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)));
+                   const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_73));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_57,tmp_qloop_57,tmp_qloop_57,tmp_qloop_57)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_61,tmp_qloop_61,tmp_qloop_61,tmp_qloop_61)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_57,tmp_qloop_57,tmp_qloop_57,tmp_qloop_57)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_61,tmp_qloop_61,tmp_qloop_61,tmp_qloop_61))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_62,tmp_qloop_62,tmp_qloop_62,tmp_qloop_62)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_63,tmp_qloop_63,tmp_qloop_63,tmp_qloop_63))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_62,tmp_qloop_62,tmp_qloop_62,tmp_qloop_62)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_63,tmp_qloop_63,tmp_qloop_63,tmp_qloop_63))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_79)))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_83))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_83))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_86,tmp_qloop_86,tmp_qloop_86,tmp_qloop_86)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_86,tmp_qloop_86,tmp_qloop_86,tmp_qloop_86))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_83)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_89))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_89))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_98))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_98))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_100,tmp_qloop_100,tmp_qloop_100,tmp_qloop_100)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_100,tmp_qloop_100,tmp_qloop_100,tmp_qloop_100)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_105,tmp_qloop_105,tmp_qloop_105,tmp_qloop_105))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_105,tmp_qloop_105,tmp_qloop_105,tmp_qloop_105))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_98)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_108))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_108))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_111,tmp_qloop_111,tmp_qloop_111,tmp_qloop_111)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_111,tmp_qloop_111,tmp_qloop_111,tmp_qloop_111)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_114,tmp_qloop_114,tmp_qloop_114,tmp_qloop_114)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_114,tmp_qloop_114,tmp_qloop_114,tmp_qloop_114)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_79)))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_119))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_119))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_121,tmp_qloop_121,tmp_qloop_121,tmp_qloop_121)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_123,tmp_qloop_123,tmp_qloop_123,tmp_qloop_123)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_121,tmp_qloop_121,tmp_qloop_121,tmp_qloop_121)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_123,tmp_qloop_123,tmp_qloop_123,tmp_qloop_123))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_125,tmp_qloop_125,tmp_qloop_125,tmp_qloop_125))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_125,tmp_qloop_125,tmp_qloop_125,tmp_qloop_125))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_79)))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0;
+                   const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_82 = jac_affine_inv_0_0_GRAY*tmp_qloop_81;
+                   const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_81;
+                   const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_88 = jac_affine_inv_1_0_GRAY*tmp_qloop_87;
+                   const real_t tmp_qloop_89 = jac_affine_inv_1_1_GRAY*tmp_qloop_87;
+                   const real_t tmp_qloop_93 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_94 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94;
+                   const real_t tmp_qloop_96 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                   const real_t tmp_qloop_97 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                   const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97;
+                   const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_107 = jac_affine_inv_1_0_GRAY*tmp_qloop_106 - tmp_qloop_94;
+                   const real_t tmp_qloop_108 = jac_affine_inv_1_1_GRAY*tmp_qloop_106 - tmp_qloop_97;
+                   const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_118 = jac_affine_inv_0_0_GRAY*tmp_qloop_117 - tmp_qloop_93;
+                   const real_t tmp_qloop_119 = jac_affine_inv_0_1_GRAY*tmp_qloop_117 - tmp_qloop_96;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_80 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                   const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                   const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73;
+                   const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69;
+                   const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69;
+                   const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77;
+                   const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_61) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_61) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79));
+                   const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_85 + jac_blending_inv_1_0*tmp_qloop_84) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_85 + jac_blending_inv_1_1*tmp_qloop_84) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_86) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_86) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83));
+                   const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89));
+                   const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_100 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_100 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_103 + jac_blending_inv_1_0*tmp_qloop_105) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_103 + jac_blending_inv_1_1*tmp_qloop_105) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98));
+                   const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_111 + jac_blending_inv_1_0*tmp_qloop_113) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_111 + jac_blending_inv_1_1*tmp_qloop_113) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_114 + jac_blending_inv_1_0*tmp_qloop_116) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_114 + jac_blending_inv_1_1*tmp_qloop_116) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79));
+                   const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_121 + jac_blending_inv_1_0*tmp_qloop_123) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_121 + jac_blending_inv_1_1*tmp_qloop_123) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_125) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_125) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16;
+       const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19;
+       const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21;
+       const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16;
+       const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4;
+       const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4;
+       const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29;
+       const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1;
+       const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5;
+       const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wx_dof_3),_mm256_mul_pd(tmp_qloop_45,wx_dof_1)),_mm256_mul_pd(tmp_qloop_48,wx_dof_2)),_mm256_mul_pd(tmp_qloop_49,wx_dof_4)),_mm256_mul_pd(tmp_qloop_50,wx_dof_5)),_mm256_mul_pd(tmp_qloop_51,wx_dof_0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wy_dof_3),_mm256_mul_pd(tmp_qloop_45,wy_dof_1)),_mm256_mul_pd(tmp_qloop_48,wy_dof_2)),_mm256_mul_pd(tmp_qloop_49,wy_dof_4)),_mm256_mul_pd(tmp_qloop_50,wy_dof_5)),_mm256_mul_pd(tmp_qloop_51,wy_dof_0));
+                   const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(tmp_qloop_93,tmp_qloop_94);
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(tmp_qloop_96,tmp_qloop_97);
+                   const __m256d tmp_qloop_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_108 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_118 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)));
+                   const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_73));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_79)))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_83))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_83))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_83)))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_89))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_89))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89)))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_98))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_98))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_98)))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_108))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_108))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_79)))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_119))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_119))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_79)))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                   const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                   const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0;
+                   const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0;
+                   const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0;
+                   const real_t tmp_qloop_82 = jac_affine_inv_0_0_BLUE*tmp_qloop_81;
+                   const real_t tmp_qloop_83 = jac_affine_inv_0_1_BLUE*tmp_qloop_81;
+                   const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0;
+                   const real_t tmp_qloop_88 = jac_affine_inv_1_0_BLUE*tmp_qloop_87;
+                   const real_t tmp_qloop_89 = jac_affine_inv_1_1_BLUE*tmp_qloop_87;
+                   const real_t tmp_qloop_93 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_94 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94;
+                   const real_t tmp_qloop_96 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                   const real_t tmp_qloop_97 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                   const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97;
+                   const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_107 = jac_affine_inv_1_0_BLUE*tmp_qloop_106 - tmp_qloop_94;
+                   const real_t tmp_qloop_108 = jac_affine_inv_1_1_BLUE*tmp_qloop_106 - tmp_qloop_97;
+                   const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_118 = jac_affine_inv_0_0_BLUE*tmp_qloop_117 - tmp_qloop_93;
+                   const real_t tmp_qloop_119 = jac_affine_inv_0_1_BLUE*tmp_qloop_117 - tmp_qloop_96;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_80 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                   const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                   const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73;
+                   const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                   const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69;
+                   const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69;
+                   const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                   const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77;
+                   const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79));
+                   const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83));
+                   const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89));
+                   const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98));
+                   const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79));
+                   const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..47499674a8b8e1e90de09f928f8db54e1f907445
--- /dev/null
+++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
@@ -0,0 +1,954 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusion::apply_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1;
+       const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4;
+       const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5;
+       const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27;
+       const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3;
+       const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29;
+       const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_7),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_12);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_15);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_10),tmp_qloop_12),tmp_qloop_15);
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_3),_mm256_mul_pd(tmp_qloop_13,wx_dof_1)),_mm256_mul_pd(tmp_qloop_16,wx_dof_2)),_mm256_mul_pd(tmp_qloop_17,wx_dof_4)),_mm256_mul_pd(tmp_qloop_18,wx_dof_5)),_mm256_mul_pd(tmp_qloop_19,wx_dof_0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_3),_mm256_mul_pd(tmp_qloop_13,wy_dof_1)),_mm256_mul_pd(tmp_qloop_16,wy_dof_2)),_mm256_mul_pd(tmp_qloop_17,wy_dof_4)),_mm256_mul_pd(tmp_qloop_18,wy_dof_5)),_mm256_mul_pd(tmp_qloop_19,wy_dof_0));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_19),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_13)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_16)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_17)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_18)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))));
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(tmp_qloop_36,tmp_qloop_37)),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(tmp_qloop_38,tmp_qloop_39))));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                   const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                   const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                   const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                   const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                   const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                   const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                   const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                   const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9));
+                   const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32);
+                   const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                   const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34);
+                   const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8;
+                   const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7;
+                   const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8;
+                   const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                   const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39));
+                   const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38));
+                   const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6;
+                   const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24;
+                   const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25;
+                   const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28;
+                   const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30;
+                   const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31;
+                   const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33;
+                   const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33;
+                   const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33;
+                   const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33;
+                   const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35;
+                   const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35;
+                   const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35;
+                   const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35;
+                   const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40;
+                   const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40;
+                   const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6;
+                   const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6;
+                   const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44;
+                   const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44;
+                   const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44;
+                   const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44;
+                   const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_7),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_12);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_15);
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_10),tmp_qloop_12),tmp_qloop_15);
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_3),_mm256_mul_pd(tmp_qloop_13,wx_dof_1)),_mm256_mul_pd(tmp_qloop_16,wx_dof_2)),_mm256_mul_pd(tmp_qloop_17,wx_dof_4)),_mm256_mul_pd(tmp_qloop_18,wx_dof_5)),_mm256_mul_pd(tmp_qloop_19,wx_dof_0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_3),_mm256_mul_pd(tmp_qloop_13,wy_dof_1)),_mm256_mul_pd(tmp_qloop_16,wy_dof_2)),_mm256_mul_pd(tmp_qloop_17,wy_dof_4)),_mm256_mul_pd(tmp_qloop_18,wy_dof_5)),_mm256_mul_pd(tmp_qloop_19,wy_dof_0));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_19),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_13)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_16)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_17)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_18)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))));
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(tmp_qloop_36,tmp_qloop_37)),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(tmp_qloop_38,tmp_qloop_39))));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                   const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                   const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                   const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                   const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                   const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                   const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                   const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                   const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9));
+                   const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32);
+                   const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                   const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34);
+                   const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8;
+                   const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7;
+                   const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8;
+                   const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                   const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39));
+                   const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38));
+                   const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23;
+                   const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23;
+                   const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23;
+                   const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23;
+                   const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23;
+                   const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23;
+                   const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33;
+                   const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33;
+                   const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33;
+                   const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33;
+                   const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33;
+                   const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33;
+                   const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35;
+                   const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35;
+                   const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35;
+                   const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35;
+                   const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35;
+                   const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35;
+                   const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40;
+                   const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40;
+                   const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40;
+                   const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40;
+                   const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40;
+                   const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40;
+                   const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42;
+                   const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42;
+                   const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42;
+                   const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42;
+                   const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42;
+                   const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42;
+                   const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44;
+                   const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44;
+                   const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44;
+                   const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44;
+                   const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44;
+                   const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8487d9b0ad3dd713264c18c9f12521aa93a867ee
--- /dev/null
+++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
@@ -0,0 +1,534 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_15 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_16 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_17 = tmp_qloop_15 + tmp_qloop_16;
+       const real_t tmp_qloop_18 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_19 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_20 = tmp_qloop_18 + tmp_qloop_19;
+       const real_t tmp_qloop_28 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_29 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_31 = jac_affine_inv_1_0_GRAY*tmp_qloop_15 + jac_affine_inv_1_1_GRAY*tmp_qloop_18;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_4),_mm256_mul_pd(tmp_qloop_11,wx_dof_5)),_mm256_mul_pd(tmp_qloop_12,wx_dof_0)),_mm256_mul_pd(tmp_qloop_3,wx_dof_3)),_mm256_mul_pd(tmp_qloop_6,wx_dof_1)),_mm256_mul_pd(tmp_qloop_9,wx_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_4),_mm256_mul_pd(tmp_qloop_11,wy_dof_5)),_mm256_mul_pd(tmp_qloop_12,wy_dof_0)),_mm256_mul_pd(tmp_qloop_3,wy_dof_3)),_mm256_mul_pd(tmp_qloop_6,wy_dof_1)),_mm256_mul_pd(tmp_qloop_9,wy_dof_2));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)))),_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_qloop_29,tmp_qloop_29,tmp_qloop_29,tmp_qloop_29))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28))),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_18,tmp_qloop_18,tmp_qloop_18,tmp_qloop_18)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_29,tmp_qloop_29,tmp_qloop_29,tmp_qloop_29))),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_19,tmp_qloop_19,tmp_qloop_19,tmp_qloop_19)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2;
+                   const real_t tmp_qloop_21 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                   const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                   const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(jac_affine_inv_0_0_GRAY*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_20 + jac_affine_inv_1_0_GRAY*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_20);
+                   const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_22);
+                   const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_23);
+                   const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_GRAY*tmp_qloop_28 + jac_affine_inv_0_1_GRAY*tmp_qloop_29)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27));
+                   const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_GRAY*(-tmp_qloop_15 - tmp_qloop_28) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_18 - tmp_qloop_29) - tmp_qloop_31);
+                   const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_16) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_19) - tmp_qloop_31);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_4),_mm256_mul_pd(tmp_qloop_11,wx_dof_5)),_mm256_mul_pd(tmp_qloop_12,wx_dof_0)),_mm256_mul_pd(tmp_qloop_3,wx_dof_3)),_mm256_mul_pd(tmp_qloop_6,wx_dof_1)),_mm256_mul_pd(tmp_qloop_9,wx_dof_2));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_4),_mm256_mul_pd(tmp_qloop_11,wy_dof_5)),_mm256_mul_pd(tmp_qloop_12,wy_dof_0)),_mm256_mul_pd(tmp_qloop_3,wy_dof_3)),_mm256_mul_pd(tmp_qloop_6,wy_dof_1)),_mm256_mul_pd(tmp_qloop_9,wy_dof_2));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                   const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5))));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)))),_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6))),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8))));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8))));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                   const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                   const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                   const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2;
+                   const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2;
+                   const real_t tmp_qloop_21 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                   const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0;
+                   const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                   const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                   const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                   const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5);
+                   const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_22);
+                   const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_23);
+                   const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_7)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27));
+                   const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_6) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_3 - tmp_moved_constant_7) - tmp_moved_constant_8);
+                   const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_8);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e916dd3df26a9612c30ab78cea0fae817c0a5c03
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,767 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusionAnnulusMap::apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38;
+       const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42;
+       const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0;
+       const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0;
+       const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97;
+       const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99;
+       const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102;
+       const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97;
+       const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111;
+       const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41;
+       const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99;
+       const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100;
+       const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41;
+       const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117;
+       const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38;
+       const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111;
+       const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42;
+       const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100;
+       const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99;
+       const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49;
+                const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49;
+                const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86;
+                const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86;
+                const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93;
+                const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93;
+                const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47;
+                const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48;
+                const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47;
+                const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48;
+                const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105;
+                const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108;
+                const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104;
+                const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16;
+       const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19;
+       const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21;
+       const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16;
+       const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4;
+       const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4;
+       const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29;
+       const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1;
+       const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5;
+       const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49;
+                const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49;
+                const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86;
+                const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86;
+                const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93;
+                const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93;
+                const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47;
+                const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48;
+                const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47;
+                const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48;
+                const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105;
+                const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108;
+                const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104;
+                const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..745bff4f979a187f635bd208043455623a25a13a
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,551 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       const real_t tmp_qloop_54 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_55 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_56 = tmp_qloop_54 + tmp_qloop_55;
+       const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56 + jac_affine_inv_1_0_GRAY*tmp_qloop_56;
+       const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_59 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_60 = tmp_qloop_58 + tmp_qloop_59;
+       const real_t tmp_qloop_61 = jac_affine_inv_0_0_GRAY*tmp_qloop_60 + jac_affine_inv_1_0_GRAY*tmp_qloop_60;
+       const real_t tmp_qloop_62 = jac_affine_inv_0_1_GRAY*tmp_qloop_56 + jac_affine_inv_1_1_GRAY*tmp_qloop_56;
+       const real_t tmp_qloop_63 = jac_affine_inv_0_1_GRAY*tmp_qloop_60 + jac_affine_inv_1_1_GRAY*tmp_qloop_60;
+       const real_t tmp_qloop_84 = jac_affine_inv_0_1_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_85 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0;
+       const real_t tmp_qloop_86 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_55;
+       const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0;
+       const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_99 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_100 = jac_affine_inv_0_0_GRAY*tmp_qloop_99;
+       const real_t tmp_qloop_101 = jac_affine_inv_1_1_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_102 = jac_affine_inv_0_1_GRAY*tmp_qloop_55;
+       const real_t tmp_qloop_103 = tmp_qloop_101 + tmp_qloop_102;
+       const real_t tmp_qloop_104 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_105 = jac_affine_inv_0_1_GRAY*tmp_qloop_104;
+       const real_t tmp_qloop_109 = jac_affine_inv_1_0_GRAY*tmp_qloop_54;
+       const real_t tmp_qloop_110 = -tmp_qloop_54 - tmp_qloop_99;
+       const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_110 - tmp_qloop_109;
+       const real_t tmp_qloop_112 = -tmp_qloop_104 - tmp_qloop_58;
+       const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_101;
+       const real_t tmp_qloop_114 = jac_affine_inv_1_1_GRAY*tmp_qloop_110 - tmp_qloop_102;
+       const real_t tmp_qloop_115 = jac_affine_inv_1_1_GRAY*tmp_qloop_58;
+       const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_115;
+       const real_t tmp_qloop_120 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_55;
+       const real_t tmp_qloop_121 = jac_affine_inv_0_0_GRAY*tmp_qloop_120 - tmp_qloop_109;
+       const real_t tmp_qloop_122 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_59;
+       const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*tmp_qloop_122 - tmp_qloop_102;
+       const real_t tmp_qloop_124 = jac_affine_inv_0_1_GRAY*tmp_qloop_120 - tmp_qloop_101;
+       const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*tmp_qloop_122 - tmp_qloop_115;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0;
+                const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_0_GRAY*tmp_qloop_81;
+                const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_81;
+                const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_88 = jac_affine_inv_1_0_GRAY*tmp_qloop_87;
+                const real_t tmp_qloop_89 = jac_affine_inv_1_1_GRAY*tmp_qloop_87;
+                const real_t tmp_qloop_93 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_94 = jac_affine_inv_0_0_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94;
+                const real_t tmp_qloop_96 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+                const real_t tmp_qloop_97 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+                const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97;
+                const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_0_GRAY*tmp_qloop_106 - tmp_qloop_94;
+                const real_t tmp_qloop_108 = jac_affine_inv_1_1_GRAY*tmp_qloop_106 - tmp_qloop_97;
+                const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_118 = jac_affine_inv_0_0_GRAY*tmp_qloop_117 - tmp_qloop_93;
+                const real_t tmp_qloop_119 = jac_affine_inv_0_1_GRAY*tmp_qloop_117 - tmp_qloop_96;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_80 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73;
+                const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69;
+                const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69;
+                const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77;
+                const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77;
+                const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_61) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_61) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79));
+                const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_85 + jac_blending_inv_1_0*tmp_qloop_84) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_85 + jac_blending_inv_1_1*tmp_qloop_84) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_86) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_86) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83));
+                const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89));
+                const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_100 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_100 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_103 + jac_blending_inv_1_0*tmp_qloop_105) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_103 + jac_blending_inv_1_1*tmp_qloop_105) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98));
+                const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_111 + jac_blending_inv_1_0*tmp_qloop_113) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_111 + jac_blending_inv_1_1*tmp_qloop_113) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_114 + jac_blending_inv_1_0*tmp_qloop_116) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_114 + jac_blending_inv_1_1*tmp_qloop_116) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79));
+                const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_121 + jac_blending_inv_1_0*tmp_qloop_123) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_121 + jac_blending_inv_1_1*tmp_qloop_123) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_125) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_125) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16;
+       const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19;
+       const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21;
+       const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16;
+       const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4;
+       const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4;
+       const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29;
+       const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1;
+       const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5;
+       const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0;
+                const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_44 = tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q];
+                const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_47 = tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0;
+                const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0;
+                const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0;
+                const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0;
+                const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0;
+                const real_t tmp_qloop_82 = jac_affine_inv_0_0_BLUE*tmp_qloop_81;
+                const real_t tmp_qloop_83 = jac_affine_inv_0_1_BLUE*tmp_qloop_81;
+                const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0;
+                const real_t tmp_qloop_88 = jac_affine_inv_1_0_BLUE*tmp_qloop_87;
+                const real_t tmp_qloop_89 = jac_affine_inv_1_1_BLUE*tmp_qloop_87;
+                const real_t tmp_qloop_93 = jac_affine_inv_1_0_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_94 = jac_affine_inv_0_0_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94;
+                const real_t tmp_qloop_96 = jac_affine_inv_1_1_BLUE*tmp_qloop_37;
+                const real_t tmp_qloop_97 = jac_affine_inv_0_1_BLUE*tmp_qloop_38;
+                const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97;
+                const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_0_BLUE*tmp_qloop_106 - tmp_qloop_94;
+                const real_t tmp_qloop_108 = jac_affine_inv_1_1_BLUE*tmp_qloop_106 - tmp_qloop_97;
+                const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_118 = jac_affine_inv_0_0_BLUE*tmp_qloop_117 - tmp_qloop_93;
+                const real_t tmp_qloop_119 = jac_affine_inv_0_1_BLUE*tmp_qloop_117 - tmp_qloop_96;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_80 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73;
+                const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69;
+                const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69;
+                const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77;
+                const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77;
+                const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79));
+                const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83));
+                const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89));
+                const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98));
+                const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79));
+                const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..337a42b7ce3e9bf9d3044e030a3dfe3f287bc6c3
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp
@@ -0,0 +1,925 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusionAnnulusMap::toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38;
+       const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42;
+       const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39;
+       const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43;
+       const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0;
+       const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0;
+       const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97;
+       const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38;
+       const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99;
+       const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102;
+       const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37;
+       const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97;
+       const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111;
+       const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41;
+       const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99;
+       const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100;
+       const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41;
+       const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117;
+       const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38;
+       const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111;
+       const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42;
+       const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100;
+       const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99;
+       const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49;
+                const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49;
+                const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86;
+                const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86;
+                const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93;
+                const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93;
+                const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47;
+                const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48;
+                const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47;
+                const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48;
+                const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105;
+                const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108;
+                const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104;
+                const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0;
+       const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16;
+       const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1;
+       const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19;
+       const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21;
+       const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0;
+       const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16;
+       const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4;
+       const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4;
+       const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29;
+       const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1;
+       const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23;
+       const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5;
+       const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19;
+       const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18;
+       const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0;
+                const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49;
+                const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49;
+                const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q];
+                const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_71 = tmp_qloop_70*2.0;
+                const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q];
+                const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_74 = tmp_qloop_73*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q];
+                const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0;
+                const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0;
+                const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0;
+                const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0;
+                const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0;
+                const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86;
+                const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86;
+                const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0;
+                const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93;
+                const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93;
+                const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47;
+                const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48;
+                const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105;
+                const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47;
+                const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48;
+                const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108;
+                const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105;
+                const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108;
+                const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104;
+                const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107;
+                const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q];
+                const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54));
+                const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88));
+                const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95));
+                const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109));
+                const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121));
+                const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131));
+                const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0;
+                const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1;
+                const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52;
+                const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52;
+                const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61;
+                const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61;
+                const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0;
+                const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57;
+                const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57;
+                const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1;
+                const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65;
+                const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65;
+                const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67);
+                const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88);
+                const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95);
+                const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67);
+                const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67);
+                const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67);
+                const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82;
+                const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89;
+                const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96;
+                const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82;
+                const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82;
+                const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82;
+                const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68;
+                const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89;
+                const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96;
+                const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133;
+                const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133;
+                const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133;
+                const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68;
+                const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89;
+                const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96;
+                const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134;
+                const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134;
+                const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134;
+                const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68;
+                const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89;
+                const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96;
+                const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135;
+                const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135;
+                const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135;
+                const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68;
+                const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89;
+                const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96;
+                const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136;
+                const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136;
+                const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136;
+                const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68;
+                const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89;
+                const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96;
+                const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137;
+                const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137;
+                const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a8ddb719e52164207837821efd8d0e735bd9f1a
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp
@@ -0,0 +1,552 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusion::apply_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1;
+       const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4;
+       const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5;
+       const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27;
+       const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3;
+       const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29;
+       const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9));
+                const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32);
+                const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34);
+                const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8;
+                const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8;
+                const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39));
+                const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38));
+                const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6;
+                const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24;
+                const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25;
+                const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28;
+                const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30;
+                const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31;
+                const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6;
+                const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33;
+                const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33;
+                const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33;
+                const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6;
+                const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35;
+                const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35;
+                const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35;
+                const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35;
+                const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6;
+                const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40;
+                const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40;
+                const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40;
+                const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40;
+                const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40;
+                const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6;
+                const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42;
+                const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42;
+                const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42;
+                const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42;
+                const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42;
+                const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6;
+                const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44;
+                const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44;
+                const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44;
+                const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9));
+                const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32);
+                const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34);
+                const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8;
+                const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8;
+                const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39));
+                const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38));
+                const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23;
+                const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23;
+                const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23;
+                const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23;
+                const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23;
+                const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33;
+                const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33;
+                const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33;
+                const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33;
+                const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33;
+                const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33;
+                const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35;
+                const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35;
+                const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35;
+                const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35;
+                const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35;
+                const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35;
+                const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40;
+                const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40;
+                const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40;
+                const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40;
+                const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40;
+                const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40;
+                const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42;
+                const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42;
+                const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42;
+                const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42;
+                const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42;
+                const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42;
+                const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44;
+                const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44;
+                const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44;
+                const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44;
+                const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..930e1257b8601da2a6c7186d9666453ef2c6f1dd
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp
@@ -0,0 +1,336 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_15 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_16 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_17 = tmp_qloop_15 + tmp_qloop_16;
+       const real_t tmp_qloop_18 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_19 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_20 = tmp_qloop_18 + tmp_qloop_19;
+       const real_t tmp_qloop_28 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_29 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_31 = jac_affine_inv_1_0_GRAY*tmp_qloop_15 + jac_affine_inv_1_1_GRAY*tmp_qloop_18;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2;
+                const real_t tmp_qloop_21 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(jac_affine_inv_0_0_GRAY*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_20 + jac_affine_inv_1_0_GRAY*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_20);
+                const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_22);
+                const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_23);
+                const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_GRAY*tmp_qloop_28 + jac_affine_inv_0_1_GRAY*tmp_qloop_29)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27));
+                const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_GRAY*(-tmp_qloop_15 - tmp_qloop_28) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_18 - tmp_qloop_29) - tmp_qloop_31);
+                const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_16) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_19) - tmp_qloop_31);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0;
+                const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q];
+                const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_8 = tmp_qloop_7*2.0;
+                const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q];
+                const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0;
+                const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0;
+                const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2;
+                const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2;
+                const real_t tmp_qloop_21 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q];
+                const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0;
+                const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0;
+                const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1;
+                const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5);
+                const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_22);
+                const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_23);
+                const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_7)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27));
+                const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_6) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_3 - tmp_moved_constant_7) - tmp_moved_constant_8);
+                const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_8);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..abc9a3672135752f8af08677996ff7cd1603825e
--- /dev/null
+++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp
@@ -0,0 +1,710 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseSupgDiffusion.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseSupgDiffusion::toMatrix_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT  _data_diffusivity_times_deltaEdge, real_t * RESTRICT  _data_diffusivity_times_deltaVertex, idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_wxEdge, real_t * RESTRICT  _data_wxVertex, real_t * RESTRICT  _data_wyEdge, real_t * RESTRICT  _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0;
+       const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0;
+       const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1;
+       const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0;
+       const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0;
+       const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4;
+       const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5;
+       const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0;
+       const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0;
+       const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0;
+       const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0;
+       const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27;
+       const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3;
+       const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29;
+       const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9));
+                const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32);
+                const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34);
+                const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8;
+                const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7;
+                const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8;
+                const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39));
+                const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38));
+                const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6;
+                const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24;
+                const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25;
+                const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28;
+                const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30;
+                const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31;
+                const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6;
+                const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33;
+                const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33;
+                const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33;
+                const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33;
+                const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33;
+                const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6;
+                const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35;
+                const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35;
+                const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35;
+                const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35;
+                const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35;
+                const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6;
+                const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40;
+                const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40;
+                const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40;
+                const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40;
+                const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40;
+                const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6;
+                const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42;
+                const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42;
+                const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42;
+                const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42;
+                const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42;
+                const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6;
+                const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44;
+                const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44;
+                const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44;
+                const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0;
+       const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0;
+       const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5;
+       const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q];
+                const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0;
+                const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                const real_t tmp_qloop_12 = tmp_qloop_11*2.0;
+                const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q];
+                const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                const real_t tmp_qloop_15 = tmp_qloop_14*2.0;
+                const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q];
+                const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8;
+                const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7;
+                const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0;
+                const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0;
+                const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q];
+                const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9));
+                const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0;
+                const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32);
+                const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0;
+                const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34);
+                const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8;
+                const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7;
+                const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8;
+                const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39));
+                const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0;
+                const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39));
+                const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0;
+                const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38));
+                const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23;
+                const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23;
+                const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23;
+                const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23;
+                const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23;
+                const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23;
+                const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33;
+                const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33;
+                const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33;
+                const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33;
+                const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33;
+                const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33;
+                const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35;
+                const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35;
+                const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35;
+                const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35;
+                const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35;
+                const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35;
+                const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40;
+                const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40;
+                const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40;
+                const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40;
+                const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40;
+                const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40;
+                const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42;
+                const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42;
+                const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42;
+                const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42;
+                const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42;
+                const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42;
+                const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44;
+                const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44;
+                const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44;
+                const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44;
+                const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44;
+                const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44;
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg